@wgslx/wgslx
Version:
Extended WebGPU shading language tools
623 lines (487 loc) • 14.8 kB
text/typescript
// Implementation of Recursive Descent Parsing as described in the WGSL spec.
import {inspect} from 'util';
import {
TextMatcher,
createRegExpTextMatcher,
createStringTextMatcher,
} from './patterns';
import {
Cursor,
Sequence,
compareCursor,
cursorGreaterOrEqualThan,
cursorGreaterThan,
} from './sequence';
import {Token} from './token';
export interface Match {
token: Token;
cursor: Cursor;
}
/** A canary of what should have matched. */
export interface Canary {
/** The token that should have matched. */
rules: Rule[];
/** The cursor before the match. */
cursor: Cursor;
}
export class MatchResult {
/** The token that matched. */
match?: Match;
/** Canaries of what should have matched. */
canaries: Canary[] = [];
clone(): MatchResult {
const result = new MatchResult();
if (this.match) {
result.match = {
token: this.match.token.clone(),
cursor: this.match.cursor,
};
}
result.canaries = this.canaries.map((c) => ({
rules: [...c.rules],
cursor: c.cursor,
}));
return result;
}
static success(cursor: Cursor, token: Token): MatchResult {
const result = new MatchResult();
result.match = {token, cursor};
return result;
}
static successFrom(
rule: Rule,
match: Match,
canaries: Canary[]
): MatchResult {
const result = new MatchResult();
result.match = match;
result.canaries = this.augmentCanaries(rule, canaries, match.cursor);
return result;
}
static failure(cursor: Cursor, rule: Rule): MatchResult {
const result = new MatchResult();
result.canaries = [{rules: [rule], cursor}];
return result;
}
static failureFrom(rule: Rule, canaries?: Canary[]): MatchResult {
const result = new MatchResult();
if (!canaries || canaries.length === 0) {
throw new Error('Expected canaries'); //@@
}
result.canaries = this.augmentCanaries(rule, canaries);
return result;
}
static augmentCanaries(
rule: Rule,
canaries: Canary[],
cursor?: Cursor
): Canary[] {
if (cursor) {
canaries = canaries.filter((c) =>
cursorGreaterOrEqualThan(c.cursor, cursor)
);
}
if (canaries.length === 0) {
return [];
}
canaries.sort((a, b) => -compareCursor(a.cursor, b.cursor));
let culledCanaries: Canary[] = [];
let canary = {
rules: [...canaries[0].rules, rule],
cursor: canaries[0].cursor,
};
for (let i = 1; i < canaries.length; i++) {
if (compareCursor(canaries[i].cursor, canary.cursor) === 0) {
if (canaries[i].rules.length < canary.rules.length) {
canary = {
rules: [...canaries[i].rules, rule],
cursor: canaries[i].cursor,
};
}
} else {
culledCanaries.push(canary);
if (culledCanaries.length >= 5) {
return culledCanaries;
}
}
}
return [canary];
}
}
/** Context for rule token matching. */
export class Context {
readonly sequence: Sequence;
readonly cache = new Map<string, Map<Rule, MatchResult>>();
text(cursor: Cursor, textRule: LiteralRule | RegExpRule): MatchResult {
// TODO: Optionally cache results, though this is linear cost.
const cursorKey = this.sequence.stringify(cursor);
const textMatch = this.sequence.match(cursor, textRule.matcher);
if (!textMatch) {
return MatchResult.failure(cursor, textRule);
}
const match = MatchResult.success(
textMatch.cursor,
Token.text(textMatch.text, cursorKey)
);
return match;
}
rule(cursor: Cursor, rule: Rule): MatchResult {
const cursorKey = this.sequence.stringify(cursor);
const cached = this.get(cursorKey, rule);
if (cached !== undefined) {
return cached;
}
const match = rule.match(cursor, this);
this.set(cursorKey, rule, match);
return match;
}
/** Gets a match for a position and rule in the cache. */
get(cursorKey: string, rule: Rule): MatchResult | undefined {
const cursorMap = this.cache.get(cursorKey);
if (!cursorMap) {
return undefined;
}
const cached = cursorMap.get(rule);
if (!cached) {
return undefined;
}
return cached.clone();
}
/** Sets a match for a position and rule in the cache. */
set(cursorKey: string, rule: Rule, matchResult: MatchResult) {
let cursorMap = this.cache.get(cursorKey);
if (!cursorMap) {
cursorMap = new Map<Rule, MatchResult>();
this.cache.set(cursorKey, cursorMap);
}
cursorMap.set(rule, matchResult.clone());
}
constructor(sequence: Sequence) {
this.sequence = sequence;
}
static from(text: string, file: string): Context {
return new Context(Sequence.from(text, file));
}
matchSource(rootRule: Rule): MatchResult {
const cursor = Cursor(0);
const matchResult = rootRule.match(cursor, this);
if (matchResult.canaries) {
// Sort canaries by descending cursor position.
matchResult.canaries.sort((a, b) => -compareCursor(a.cursor, b.cursor)); //@@
}
if (!matchResult.match) {
return matchResult; //@@
}
// If the match did not consume the entire sequence, it is a failure.
if (matchResult.match.cursor.segment < this.sequence.segments.length) {
matchResult.match = undefined;
}
return matchResult;
}
static matchSource(text: string, file: string, rootRule: Rule): MatchResult {
const context = Context.from(text, file);
return context.matchSource(rootRule);
}
}
export abstract class Rule {
/** The symbol this rule is a part of. */
symbol?: string;
/** Match this cursor against a context. */
abstract match(cursor: Cursor, context: Context): MatchResult;
}
type FlexRule = string | RegExp | Rule;
function rulifyOne(rule: FlexRule): Rule {
if (typeof rule === 'string') {
return literal(rule);
}
if (rule instanceof RegExp) {
return regex(rule);
}
return rule;
}
function rulifyAll(rules: FlexRule[]): Rule[] {
return rules.map((r) => rulifyOne(r));
}
export class LiteralRule extends Rule {
readonly matcher: TextMatcher;
readonly literals: string[];
match(cursor: Cursor, context: Context): MatchResult {
return context.text(cursor, this);
}
constructor(literals: string[]) {
super();
this.matcher = createStringTextMatcher(...literals);
this.literals = literals;
}
}
export function literal(...literals: string[]): Rule {
return new LiteralRule(literals);
}
export class RegExpRule extends Rule {
readonly matcher: TextMatcher;
readonly patterns: RegExp[];
match(cursor: Cursor, context: Context): MatchResult {
return context.text(cursor, this);
}
constructor(patterns: RegExp[]) {
super();
this.matcher = createRegExpTextMatcher(...patterns);
this.patterns = patterns;
}
}
export function regex(...patterns: RegExp[]): Rule {
return new RegExpRule(patterns);
}
export class SequenceRule extends Rule {
readonly rules: Rule[];
match(cursor: Cursor, context: Context): MatchResult {
const tokens: Token[] = [];
const canaries: Canary[] = [];
for (const rule of this.rules) {
const matchResult = context.rule(cursor, rule);
canaries.push(...matchResult.canaries);
if (!matchResult.match) {
return MatchResult.failureFrom(this, canaries);
}
tokens.push(matchResult.match.token);
cursor = matchResult.match.cursor;
}
const token = Token.group(tokens, 'S');
return MatchResult.successFrom(
this,
{
cursor,
token,
},
canaries
);
}
constructor(rules: Rule[]) {
super();
this.rules = rules;
}
}
export function sequence(first: FlexRule, ...rest: FlexRule[]): Rule {
const rules = rulifyAll([first, ...rest]);
if (rules.length === 0) {
throw new Error(); //@@
}
if (rules.length === 1) {
return rules[0];
}
return new SequenceRule(rules);
}
export class UnionRule extends Rule {
readonly rules: Rule[];
match(cursor: Cursor, context: Context): MatchResult {
let longestMatch: Match | undefined = undefined;
let combinedCanaries: Canary[] = [];
for (const rule of this.rules) {
const matchResult = context.rule(cursor, rule);
combinedCanaries.push(...matchResult.canaries);
if (!matchResult.match) {
continue;
}
if (!longestMatch) {
longestMatch = matchResult.match;
continue;
}
if (cursorGreaterThan(matchResult.match.cursor, longestMatch.cursor)) {
longestMatch = matchResult.match;
continue;
}
}
if (!longestMatch) {
return MatchResult.failureFrom(this, combinedCanaries);
}
longestMatch.token = Token.group([longestMatch.token], 'U');
return MatchResult.successFrom(this, longestMatch, combinedCanaries);
}
constructor(rules: Rule[]) {
super();
this.rules = rules;
if (this.rules.length === 0) {
throw new Error('Expected at least one rule in a union.'); //@@
}
}
}
export function union(first: FlexRule, ...rest: FlexRule[]): Rule {
const rules = rulifyAll([first, ...rest]);
if (rules.length === 1) {
return rules[0];
}
// const literals: LiteralRule[] = [];
// const regexps: RegExpRule[] = [];
// const others: RuleExec[] = [];
return new UnionRule(rules);
}
export class MaybeRule extends Rule {
readonly rule: Rule;
readonly modifiedSymbol?: string;
match(cursor: Cursor, context: Context): MatchResult {
const matchResult = context.rule(cursor, this.rule);
const token = matchResult.match ? [matchResult.match.token] : [];
const match: Match = {
token: Token.modify(token, '?', this.modifiedSymbol),
cursor: matchResult.match ? matchResult.match.cursor : cursor,
};
return MatchResult.successFrom(this, match, matchResult.canaries);
}
constructor(rule: Rule) {
super();
this.rule = rule;
if (rule instanceof SymbolRule) {
this.modifiedSymbol = rule.symbol;
}
}
}
export function maybe(first: FlexRule, ...rest: FlexRule[]): Rule {
const rule = sequence(first, ...rest);
return new MaybeRule(rule);
}
export class StarRule extends Rule {
readonly rule: Rule;
readonly modifiedSymbol?: string;
match(cursor: Cursor, context: Context): MatchResult {
const tokens: Token[] = [];
let matchResult = context.rule(cursor, this.rule);
const canaries: Canary[] = [];
while (matchResult.match) {
tokens.push(matchResult.match.token);
cursor = matchResult.match.cursor;
matchResult = context.rule(cursor, this.rule);
}
const result = {
token: Token.modify(tokens, '*', this.modifiedSymbol),
cursor,
};
return MatchResult.successFrom(this, result, matchResult.canaries);
}
constructor(rule: Rule) {
super();
this.rule = rule;
if (rule instanceof SymbolRule) {
this.modifiedSymbol = rule.symbol;
}
}
}
export function star(first: FlexRule, ...rest: FlexRule[]): Rule {
const rule = sequence(first, ...rest);
return new StarRule(rule);
}
export class SymbolRule extends Rule {
readonly symbol: string;
private leftRecursiveTail?: Rule;
private rule?: Rule;
isLeftRecursive() {
return !!this.leftRecursiveTail; //@@
}
get(): Rule | null {
return this.rule ?? null;
}
set(rule: Rule) {
if (this.rule) {
throw new Error(`Duplicate initialization of rule ${this.rule}`); //@@
}
//SymbolRule.symbolize(rule, this.symbol);
// Look for left recursion.
if (rule instanceof UnionRule) {
const tail: Rule[] = [];
const body: Rule[] = [];
for (const or of rule.rules) {
if (or instanceof SequenceRule && or.rules[0] === this) {
// Left recursion detected, remove the recursive element.
const [_, second, ...rest] = or.rules;
tail.push(sequence(second, ...rest));
continue;
}
body.push(or);
}
if (tail.length !== 0) {
const [tail1, ...tailRest] = tail;
this.leftRecursiveTail = union(tail1, ...tailRest);
const [body1, ...bodyRest] = body;
rule = union(body1, ...bodyRest);
}
}
this.rule = rule;
}
match(cursor: Cursor, context: Context): MatchResult {
const bodyRule = this.rule;
if (!bodyRule) {
throw new Error(`Uninitialized rule ${this.symbol}`);
}
// Match the body once.
let matchResult = context.rule(cursor, bodyRule);
if (!matchResult.match) {
return MatchResult.failureFrom(this, matchResult.canaries);
}
if (!this.leftRecursiveTail) {
matchResult.match.token = Token.symbol(
matchResult.match.token,
this.symbol
);
return MatchResult.successFrom(
this,
matchResult.match,
matchResult.canaries
);
}
// If left recursive, we keep matching the tail rule until it fails.
const tailRule = this.leftRecursiveTail;
const tokens = [matchResult.match.token];
const canaries: Canary[] = [...matchResult.canaries];
let tailMatchResult = context.rule(matchResult.match.cursor, tailRule);
while (tailMatchResult.match) {
tokens.push(tailMatchResult.match.token);
canaries.push(...tailMatchResult.canaries);
matchResult = tailMatchResult;
tailMatchResult = context.rule(tailMatchResult.match.cursor, tailRule);
}
canaries.push(...tailMatchResult.canaries);
//console.log('no recursive match', this);
const token = Token.group(tokens, 'L', this.symbol);
// Canaries are combined from the initial match and the tail.
return MatchResult.successFrom(
this,
{
token,
cursor: matchResult.match!.cursor,
},
canaries
);
}
constructor(name: string) {
super();
this.symbol = name;
}
static symbolize(rule: Rule, symbol: string) {
if (rule instanceof SymbolRule) {
return;
}
rule.symbol = symbol;
if (rule instanceof StarRule || rule instanceof MaybeRule) {
SymbolRule.symbolize(rule.rule, symbol);
}
if (rule instanceof SequenceRule || rule instanceof UnionRule) {
for (const subrule of rule.rules) {
SymbolRule.symbolize(subrule, symbol);
}
}
}
}
const registry = new Map<string, SymbolRule>();
export function symbol(name: string): SymbolRule {
const rule = new SymbolRule(name);
if (registry.has(name)) {
throw new Error(`Duplicate named rule ${rule}`); //@@
}
return rule;
}
export function rule(name: string): SymbolRule {
const rule = registry.get(name);
if (!rule) {
throw new Error(`Duplicate named rule ${rule}`); //@@
}
return rule;
}