UNPKG

tenko

Version:

A "pixel perfect" 100% spec compliant ES2021 JavaScript parser written in JS.

652 lines (613 loc) 25.7 kB
// Transform the AST to optimize things away at build time // Will change the given AST inline. // Not that it matters much but the invariant is that each transform only has access to/can mutate the direct parent. // So in theory transformations should be safe to apply in parallel as long as a node is not an active parent of a // currently visited node in a parallel transform. import { walker } from '../src/tools/walker.mjs'; const SCRUB_OTHERS = process.argv.includes('--no-compat'); // force all occurrences of compatAcorn and compatBabel to false const SCRUB_ERRORS = process.argv.includes('--strip-errors'); // strip error message contents (wip) const NATIVE_SYMBOLS = process.argv.includes('--native-symbols'); // Replace `PERF_$` with `%`? const NO_AST = process.argv.includes('--no-ast'); // drop ast related code from the parser (`AST_*`) let strippedAssertNames = new Set; let assertWhitelist = new Set([ 'ASSERT', 'ASSERT_VALID', 'ASSERT_FDS', 'ASSERT_BINDING_TYPE', 'ASSERT_LABELSET', 'ASSERT_ASSIGN_EXPR', 'ASSERT_BINDING_ORIGIN', ]); let assertSkipWhitelist = new Set([ 'ASSERT_skip', 'ASSERT_skipPeek', 'ASSERT_skipRex', 'ASSERT_skipDiv', 'ASSERT_skipAny', 'ASSERT_skipToParenOpenOrDie', 'ASSERT_skipToParenOpenCurlyOpen', 'ASSERT_skipToCurlyOpenOrDie', 'ASSERT_skipToFromOrDie', 'ASSERT_skipToStringOrDie', 'ASSERT_skipToIdentOrDie', 'ASSERT_skipToArrowOrDie', 'ASSERT_skipToAsOrDie', 'ASSERT_skipToAsCommaCurlyClose', 'ASSERT_skipToAsCommaFrom', 'ASSERT_skipToColonOrDie', 'ASSERT_skipToTargetOrDie', 'ASSERT_skipToStatementStart', 'ASSERT_skipToExpressionStart', 'ASSERT_skipToObjectMemberStart', 'ASSERT_skipToObjectMemberRest', 'ASSERT_skipToClassMemberStart', 'ASSERT_skipToClassMemberRest', 'ASSERT_skipToSwitchBody', 'ASSERT_skipToBindingStart', 'ASSERT_skipToBindingStartGrouped', 'ASSERT_skipToColonParenOpen', 'ASSERT_skipToIdentParenOpen', 'ASSERT_skipToIdentStarParenOpen', 'ASSERT_skipToIdentStarCurlyOpen', 'ASSERT_skipToIdentCommaCurlyClose', 'ASSERT_skipToCommaCurlyClose', 'ASSERT_skipToIdentCurlyOpen', 'ASSERT_skipToIdentCurlyClose', 'ASSERT_skipToIdentStarCurlyOpenParenOpenString', 'ASSERT_skipToAwaitParenOpen', 'ASSERT_skipToIdentStringNumberSquareOpen', 'ASSERT_skipToParamStart', 'ASSERT_skipToExpressionStartSemi', 'ASSERT_skipToExpressionStartGrouped', 'ASSERT_skipToAfterNew', 'ASSERT_skipToExpressionStartSquareCloseComma', ]); let exportedSymbols = [ 'COLLECT_TOKENS_NONE', 'COLLECT_TOKENS_SOLID', 'COLLECT_TOKENS_ALL', 'COLLECT_TOKENS_TYPES', 'GOAL_MODULE', 'GOAL_SCRIPT', 'WEB_COMPAT_OFF', 'WEB_COMPAT_ON', 'VERSION_EXPONENTIATION', 'VERSION_WHATEVER', ]; // Collect identifier names to inline // This should contain all constants from specific files with the values they should replace let constMap = new Map; let recordingConstants = false; let $flag_lf = 0; let $flag_start = 0; let $flag_leaf = 0; let $flag_group = 7; // keep in sync with tokentype.js function assert(a, b, d) { // This is an assert that can be dropped for a build... It confirms hashing assumptions // (Will also be an invaluable tool when adding a new node type ;) if (a !== b) throw new Error('Expected `' + b + '`, got `' + a + '`' + (d ? ': ' + d : '')); } function replace(parent, prop, index, node) { if (index === undefined) { parent[prop] = node; } else { parent[prop][index] = node; } } let n = 0 export function transform(ast, localConstMap, recordConstants) { constMap = localConstMap; recordingConstants = recordConstants; walker(ast, (node, parent, prop, index, _, revisits, $) => { switch (node.type) { case 'CallExpression': if (node.callee.type === 'Identifier') { let name = node.callee.name; if (name.startsWith('ASSERT')) { if (assertSkipWhitelist.has(name)) { // Remove the ASSERT prefix in the name, this function should already be defined as well // The first argument (some aspect of the token to skip we want to assert) must be dropped, too node.callee.name = name.slice('ASSERT_'.length); node.arguments.shift(); } else { if (!strippedAssertNames.has(name)) { if (!assertWhitelist.has(name)) { throw new Error('assert calls that can be dropped must be whitelisted to prevent accidentally dropping calls to new prefix-only asserts, `' + name + '` was not white listed'); } console.log('Stripping', name); strippedAssertNames.add(name); } replace(parent, prop, index, { type: 'Literal', loc: node.loc, value: 0, raw: '0', }); return false; } } } break; case 'ExportAllDeclaration': case 'ExportDefaultDeclaration': case 'ExportNamedDeclaration': // Drop all import declarations replace(parent, prop, index, { type: 'EmptyStatement', loc: node.loc, }); return false; // Prevent further traversal of the node case 'Identifier': if ( // This check also just validates whether the ident is interesting to us at all, and we can ditch most node // type.prop validations because of it. Most but not all. constMap.has(node.name) && // Exported symbols still have a const declaration (parent.type !== 'VariableDeclarator' || prop === 'init') && // The `const x = dev() ? {X:1} : true` pattern causes this check to be necessary (parent.type !== 'Property' || prop !== 'key') ) { assert( ['FunctionDeclaration', 'VariableDeclarator', 'Property', 'ExportSpecifier', 'AssignmentExpression', 'ImportSpecifier'].includes(parent.type) && !['Property.value', 'AssignmentExpression.right', 'VariableDeclarator.init'].includes(parent.type + '.' + prop), false, 'constant names should be unique and not reused in awkward places: ' + parent.type + '.' + prop + ' -> ' + node.name ); let constNode = constMap.get(node.name); replace(parent, prop, index, { type: 'Literal', loc: node.loc, value: 'das toch gek? ' + n++, raw: '0', }); // $(constNode, {}, 'init'); replace(parent, prop, index, constNode); // Note: this will share the locs. But who cares :D *cough* // return false; } break; case 'Import': case 'ImportDeclaration': case 'ImportNamespaceSpecifier': // Drop all import declarations replace(parent, prop, index, { type: 'EmptyStatement', loc: node.loc, }); return false; // Prevent further traversal of the node case 'UpdateExpression': if (node.argument.type === 'Identifier') { switch (node.argument.name) { case '__$flag_lf': replace(parent, prop, index, { type: 'Literal', loc: node.loc, value: ++$flag_lf, raw: String($flag_lf), }); break; case '__$flag_start': replace(parent, prop, index, { type: 'Literal', loc: node.loc, value: ++$flag_start, raw: String($flag_start), }); break; case '__$flag_leaf': replace(parent, prop, index, { type: 'Literal', loc: node.loc, value: ++$flag_leaf, raw: String($flag_leaf), }); break; case '__$flag_group': replace(parent, prop, index, { type: 'Literal', loc: node.loc, value: ++$flag_group, raw: String($flag_group), }); break; } } break; } }, (node, parent, prop, index, _, revisits) => { switch (node.type) { case 'BinaryExpression': if (node.left.type === 'Literal' && node.right.type === 'Literal') { assert(typeof node.left.value, typeof node.right.value); switch (node.operator) { case '<<': { // console.log('Replacing', node.left.value, '<<', node.right.value) let v = node.left.value << node.right.value; replace(parent, prop, index, { type: 'Literal', loc: node.loc, value: v, raw: String(v), }); break; } case '|': { // console.log('Replacing', node.left.value, '|', node.right.value) let v = node.left.value | node.right.value; replace(parent, prop, index, { type: 'Literal', loc: node.loc, value: v, raw: String(v), }); break; } case '-': { // console.log('Replacing', node.left.value, '-', node.right.value) let v = node.left.value - node.right.value; replace(parent, prop, index, { type: 'Literal', loc: node.loc, value: v, raw: String(v), }); break; } case '+': { // console.log('Replacing', node.left.value, '+', node.right.value) let v = node.left.value + node.right.value; replace(parent, prop, index, { type: 'Literal', loc: node.loc, value: v, raw: String(v), }); break; } default: assert(0, 1, 'can optimize static binary expression on ' + node.operator); console.log('Skipping:', node.left.value, node.operator, node.right.value) } } break; case 'BlockStatement': { let body = node.body; // Don't visit the last element. We don't care if the return/* is the last statement. for (let i=0, len = body.length - 1; i<len; ++i) { if (body[i] && ['ReturnStatement', 'BreakStatement', 'ContinueStatement', 'ThrowStatement'].includes(body[i].type)) { // Prune dead code in a block that has statements following a `return` statement. This can happen due to // build artifacts or just development state. // We don't rely on stuff in the dead code (like eval or padding to prevent jit stuff) so get rid of it. body.length = i + 1; // Discard the rest break; } } // Drop all empty statements (useless semi colons) from blocks for (let i = body.length - 1; i >= 0; --i) { if (body[i] && body[i].type === 'EmptyStatement') { body.splice(i, 1); } } break; } // case 'BreakStatement': // if (Array.isArray(parent[prop]) && parent[prop].length > index + 1) { // // Prune dead code in a block that has statements following a `return` statement. This can happen due to // // build artifacts or just development state. // parent[prop].length = index + 1; // } // break; case 'CallExpression': if (node.callee.type === 'Identifier') { let name = node.callee.name; if (NO_AST) { if (name.startsWith('AST_') || name.startsWith('_AST_')) { let newNode = { type: 'Literal', loc: node.loc, value: 0, raw: '0', }; if (index === undefined) { parent[prop] = newNode; } else { parent[prop][index] = newNode; } } } else { // not no_ast if (name === 'AST_close') { // AST_close(start, line, col, 'BlockStatement') -> AST_close(start, line, col), because the names are only used for assertions assert(node.arguments.length, 4); // [offset, line, col, node name(s) to close] node.arguments.pop(); // Drop the name(s); it's for debugging only } } switch (name) { case 'DEVONLY': // Scrub `dev()` branching // console.log(revisits, 'Drop DEVONLY') replace(parent, prop, index, { type: 'Literal', loc: node.loc, value: 0, raw: '0', }); break; case 'sansFlag': // Basically an alias for `(a | b) ^ b`, to unset all bits in a that are set in b // return '((' + $w(node.arguments[0]) + ' | ' + $w(node.arguments[1]) + ') ^ ' + $w(node.arguments[1]) + ')'; // console.log(revisits, 'Inline sansFlag', node.arguments[0].type, node.arguments[1].type); replace(parent, prop, index, { type: 'BinaryExpression', left: { type: 'BinaryExpression', left: node.arguments[0], operator: '|', right: node.arguments[1], }, operator: '^', right: node.arguments[1], }); // Revisit this node because if both arguments are literals then we can fold them up now return true; case 'hasAllFlags': // Basically an alias for `(flags1 & flags2) === flags2`, to check whether at least all bits in b are set in a // return '((' + $w(node.arguments[0]) + ' & ' + $w(node.arguments[1]) + ') === ' + $w(node.arguments[1]) + ')'; // console.log(revisits, 'Inline hasAllFlags', node.arguments[0].type, node.arguments[1].type); replace(parent, prop, index, { type: 'BinaryExpression', left: { type: 'BinaryExpression', left: node.arguments[0], operator: '&', right: node.arguments[1], }, operator: '===', right: node.arguments[1], }); // Revisit this node because if both arguments are literals then we can fold them up now return true; case 'hasAnyFlag': // Basically an alias for `(flags1 & flags2) === flags2`, to check whether at least one bit in b is set in a // return '((' + $w(node.arguments[0]) + ' & ' + $w(node.arguments[1]) + ') !== 0)'; // console.log(revisits, 'Inline hasAnyFlag', node.arguments[0].type, node.arguments[1].type); replace(parent, prop, index, { type: 'BinaryExpression', left: { type: 'BinaryExpression', left: node.arguments[0], operator: '&', right: node.arguments[1], }, operator: '!==', right: { type: 'Literal', loc: node.loc, value: 0, raw: '0', }, }); // Revisit this node because if both arguments are literals then we can fold them up now return true; case 'hasNoFlag': // Basically an alias for `(flags1 & flags2) === 0`, to check whether none of the bits in b is set in a // return '((' + $w(node.arguments[0]) + ' & ' + $w(node.arguments[1]) + ') === 0)'; // console.log(revisits, 'Inline hasNoFlag', node.arguments[0].type, node.arguments[1].type); replace(parent, prop, index, { type: 'BinaryExpression', left: { type: 'BinaryExpression', left: node.arguments[0], operator: '&', right: node.arguments[1], }, operator: '===', right: { type: 'Literal', loc: node.loc, value: 0, raw: '0', }, }); // Revisit this node because if both arguments are literals then we can fold them up now return true; } } break; case 'ConditionalExpression': if (node.test.type === 'Literal') { // console.log(revisits, 'Dropping ternary with condition on', node.test.value); replace(parent, prop, index, node.test.value ? node.consequent : node.alternate); } break; case 'ExpressionStatement': if (node.expression.type === 'Literal' && node.expression.value !== 'use strict') { // Could probably drop the strict check, but I guess it can't hurt replace(parent, prop, index, { type: 'EmptyStatement', loc: node.loc, }); } break; case 'FunctionDeclaration': if (node.id && node.id.type === 'Identifier') { if (node.id.name.startsWith('ASSERT') || (!NATIVE_SYMBOLS && node.id.name.startsWith('PERF_')) || (NO_AST && (node.id.name.startsWith('AST_') || node.id.name.startsWith('_AST_')))) { replace(parent, prop, index, { type: 'EmptyStatement', loc: node.loc, }); } else if (node.id.name === 'AST_close') { assert(node.params.length, 4, 'ast_close has 4 params'); node.params.pop(); // drop the name; it is only used in ASSERTs } // Note: the next func->arrow transform slows down load time and probably a micro slower runtime. Not worth it // // Replace function decls with arrows. // // Going the extra mile for arrows that can get expression bodies // // Going the extra extra mile (potentially slightly unsafe) by giving expressionstatements the same treatment // replace(parent, prop, index, { // type: 'VariableDeclaration', // loc: node.loc, // kind: 'let', // declarations: [{ // type: 'VariableDeclarator', // id: node.id, // loc: node.loc, // init: { // type: 'ArrowFunctionExpression', // loc: node.loc, // params: node.params, // id: null, // generator: node.generator, // async: node.async, // expression: node.body.body.length === 1 && (node.body.body[0].type === 'ReturnStatement' || node.body.body[0].type === 'ExpressionStatement'), // single return? change to expression body // body: (node.body.body.length === 1 && node.body.body[0].type === 'ReturnStatement') ? node.body.body[0].argument : (node.body.body.length === 1 && node.body.body[0].type === 'ExpressionStatement') ? node.body.body[0].expression : node.body, // inline the arg of a single return // } // }], // }); } break; case 'Identifier': if (SCRUB_OTHERS) { // Skip property names otherwise you'll get `{false: false}` which is an error in destructuring assignments if (parent.type !== 'Property' && (node.name === 'babelCompat' || node.name === 'acornCompat')) { replace(parent, prop, index, { type: 'Literal', loc: node.loc, value: false, raw: 'false', }); } // Replace all `undefined` with `void 0` // if (node.name == 'undefined') { // replace(parent, prop, index, { // type: 'UnaryExpression', // operator: 'void', // argument: { // type: 'Literal', // loc: node.loc, // value: 0, // raw: '0', // } // }) // } } break; case 'IfStatement': if (node.test.type === 'Literal') { if (node.test.value) { replace(parent, prop, index, node.consequent); } else { if (node.alternate) { replace(parent, prop, index, node.alternate); } else { replace(parent, prop, index, { type: 'EmptyStatement', loc: node.loc, }); } } } break; case 'Program': { let body = node.body; // Drop all empty statements (useless semi colons) from blocks for (let i = body.length - 1; i >= 0; --i) { if (body[i] && body[i].type === 'EmptyStatement') { body.splice(i, 1); } } break; } case 'SequenceExpression': { // Note: everything except the last element is fair game. Side-effect free expressions should be dropped. // (And we assume old invariants still hold; no getters/setters, no proxies, etc. No underwater footguns.) for (let i = node.expressions.length - 2; i >= 0; --i) { if (node.expressions[i].type === 'Literal') { // Could probably drop the strict check, but I guess it can't hurt node.expressions.splice(i, 1); } } // Note: there's no real need to drop the node. There's always at least one element (the last one, the actual value // that wouldn't be safe to touch), and the serialization of one element is the same as if without the node. // However, we do anyways because heuristics may not take a single-element sequence into account (after all, // that _is_ an artifact that one wouldn't find in real ASTs) if (node.expressions.length === 1) { replace(parent, prop, index, node.expressions[0]); } break; } case 'UnaryExpression': if (node.argument.type === 'Literal') { switch (node.operator) { case '!': replace(parent, prop, index, { type: 'Literal', loc: node.loc, value: !node.argument.value, raw: String(!node.argument.value), }); break; case '~': replace(parent, prop, index, { type: 'Literal', loc: node.loc, value: ~node.argument.value, raw: String(~node.argument.value), }); break; case 'typeof': replace(parent, prop, index, { type: 'Literal', loc: node.loc, value: typeof node.argument.value, raw: String(typeof node.argument.value), }); break; case '-': // Note: `-5` is in theory the unary operator `-` with the number `5`. Don't bother with that case. if (typeof node.argument.value !== 'number') { replace(parent, prop, index, { type: 'Literal', loc: node.loc, value: - node.argument.value, raw: String(- node.argument.value), }); } break; case '+': replace(parent, prop, index, { type: 'Literal', loc: node.loc, value: + node.argument.value, raw: String(+ node.argument.value), }); break; default: assert(0, 1, 'can optimize static unary expression on ' + node.operator); } } break; case 'VariableDeclaration': assert(node.declarations.length, 1, 'coding style uses only one binding per declaration (counting a whole destructuring as one) `' + (node.declarations.length !== 1 && (node.kind + ' ' + node.declarations.map($).join(', '))) + '`'); let decl = node.declarations[0]; if (decl.id.type === 'Identifier') assert(constMap.has(decl.id.name), false, 'constants should not be redefined for: ' + decl.id.name); if (recordingConstants && node.kind === 'const' && decl.id.type === 'Identifier') { assert(parent.nodeType !== 'ForInStatement' && parent.nodeType !== 'ForOfStatement' && parent.nodeType !== 'ForStatement', true, 'files from which constants are recorded would not use const inside a for-header'); let name = decl.id.name; constMap.set(name, decl.init); // All constants must have an init as per spec // console.log('Sub-walk on constant init:', name, decl.init) // $(decl.init, node, 'init'); // console.log('End of sub-walk on constant init:') if (!exportedSymbols.includes(name)) { console.log('Scrubbing the constant for', name); // Only remove declarations that are not exported. We still inline the exported symbols, but also need // their name to exist for the export template, so leave them. replace(parent, prop, index, { type: 'EmptyStatement', loc: node.loc, }); // return false; } } break; } }); }