UNPKG

subscript

Version:

Modular expression parser & evaluator

113 lines (101 loc) 4.92 kB
// ASI: layered onto parser via hooks. // Parser core has no awareness of `;`, newlines, or block ends — everything // dialect-specific (statement separator, line-break-sensitive call/access, // `}` as implicit terminator) lives here. import { parse, prec, cur, idx, seek } from '../parse.js'; const SPACE = 32, LF = 10, SEMI = 59, BLOCK_END = 125, BRACKET = 91, PAREN = 40; // prec.asi customizable before importing (default: prec[';']). const lvl = prec.asi ?? prec[';']; // Cache pristine parse.space / parse.step on first load so re-imports REPLACE // (not chain) — otherwise `prec.asi=0; await import('./asi.js?v=disabled')` // would leave previous ASI layers active. const baseSpace = parse._baseSpace ??= parse.space; const baseStep = parse._baseStep ??= parse.step; const isNode = a => Array.isArray(a) || typeof a === 'string'; // A node is a *statement* (vs an expression) when its operator sits at // statement precedence — `;` and the statement keywords register at or just // above prec[';']. A bare `{…}` is a block statement when its body is one. // Used to decide ASI before `[`/`(`: a statement can't be indexed or called, // so `{a;b}[x]` and `if(c){}\n(x)` split, while `a\n(b)` stays a call. const STMT = (prec[';'] ?? 5) + 1; const isStmt = n => Array.isArray(n) && (prec[n[0]] <= STMT || (n[0] === '{}' && isStmt(n[1]))); // LF immediately preceding the next non-space at i (used to detect `;\n`). const hasLineBreak = (i, c) => { while ((c = cur.charCodeAt(i)) <= SPACE) { if (c === LF) return true; i++; } return false; }; // True iff an LF immediately precedes idx (only whitespace between). // Computed on demand — robust against nested expr() calls eating the LF. const lineBreak = (i = idx, c) => { while (i-- > 0 && (c = cur.charCodeAt(i)) <= SPACE) if (c === LF) return true; return false; }; // Override space: scan whitespace region for LFs (parse.js's space is // LF-agnostic), and swallow `;\n` runs into ASI machinery (avoids deep nary `;` // recursion on long files). parse.semi records that a hard terminator was // consumed, so the surrounding expression knows to terminate. parse.space = (cc, from) => { for (;;) { from = idx; cc = baseSpace(); while (from < idx) if (cur.charCodeAt(from++) === LF) { parse.newline = true; break; } if (cc === SEMI && hasLineBreak(idx + 1)) { seek(idx + 1); parse.newline = parse.semi = true; continue; } return cc; } }; // Sub-expression boundary: clear sticky flags on entry so outer state // doesn't bleed into `(...)`, `[...]`, `{...}`. Top-level parse() also calls // this with no args. parse.enter = () => parse.newline = parse.semi = false; // `}` closes a block — outer context sees an implicit newline. A hard semicolon // consumed inside the block belongs to that block. parse.exit = (p, end) => { if (end === BLOCK_END) parse.newline = true, parse.semi = false; }; // Wrap iteration step: bail at high prec when `;\n` consumed; fire ASI before // `[`/`(` that begin a new statement; fire ASI when no operator continues // across a newline. parse.step = (a, p, cc, expr) => { if (parse.semi && p >= lvl) return false; if (a && !isNode(a)) return null; if (isNode(a)) { // `[` continues an expression as a member index, but on a new line or // after a statement it starts one. `(` continues an expression as a call; // it only starts a new statement after a statement node, or — at // sub-expression precedence — acts as a boundary on a new line (`let a\n(x)`). const brk = (cc === BRACKET || cc === PAREN) && lineBreak(); if (parse.semi || (cc === BRACKET && (brk || isStmt(a))) || (cc === PAREN && (isStmt(a) || (brk && p >= lvl)))) return asi(a, p, expr) ?? null; } const nl = parse.newline; return baseStep(a, p, cc, expr) ?? (isNode(a) && nl ? asi(a, p, expr) ?? null : null); }; // Runaway-recursion guard: asi recurses once per consecutive statement in a // block, so depth ~ statement count. Cap well below the JS call-stack limit // (which blows ~5–10k statements) but high enough for realistic generated code. let asiDepth = 0; const MAX_ASI_DEPTH = 2000; const asi = parse.asi = (a, p, expr, b, items) => { if (p >= lvl || asiDepth >= MAX_ASI_DEPTH) return; parse.semi = false; // Bail if the inner expr didn't actually consume anything. Without this, a // lookup handler that returns a non-array sentinel (e.g. switch.js's // `reserve` flagging `case`/`default` inside a switch body) lets expr return // a truthy token without advancing idx, and the outer ASI loop appends to its // semicolon-list forever. const beforeIdx = idx; asiDepth++; try { b = expr(lvl - .5); } finally { asiDepth--; } if (!b || idx === beforeIdx) return; items = b?.[0] === ';' ? b.slice(1) : [b]; return a?.[0] === ';' ? (a.push(...items), a) : [';', a, ...items]; };