UNPKG

cm-tarnation

Version:

An alternative parser for CodeMirror 6

426 lines (372 loc) 12 kB
/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at https://mozilla.org/MPL/2.0/. */ import type * as DF from "../definition" import { Matched } from "../matched" import { RegExpMatcher } from "../matchers/regexp" import type { Repository } from "../repository" import type { GrammarState } from "../state" import { Rule } from "./rule" /** A {@link Rule} subclass that uses *other* {@link Rule}s to chain together matches. */ export class Chain extends Rule { /** The internal list of rules and their quantifier types. */ private declare chain: ChainRule[] /** * A {@link RegExpMatcher} pattern that, if provided, will be used to skip * characters which are matched. This can be used to skip whitespace in a * chain without making sure every rule actually handles whitespace. */ private declare skip?: RegExpMatcher /** * Internal {@link ChainContext} used for keeping track of state when * checking if this rule matches. */ private declare context: ChainContext /** * @param repo - The {@link Repository} to add this rule to. * @param rule - The rule definition. */ constructor(repo: Repository, rule: DF.Chain) { super(repo, rule) this.chain = rule.chain.map(item => parseChainRule(repo, item)) if (rule.skip) { this.skip = new RegExpMatcher(rule.skip, repo.ignoreCase, repo.variables) } this.context = new ChainContext(this.chain, this.skip) } /** * @param state - The current {@link GrammarState}. * @param str - The string to match. * @param pos - The position to start matching at. */ exec(str: string, pos: number, state: GrammarState) { this.context.reset(state, str, pos) while (!this.context.done) step(this.context) const finished = this.context.finish() if (!finished) return null return new Matched(state, this.node, this.context.total, pos, finished) } } /** * Step function for a running chain match. A step may or may not advance * the chain - this is simply repeated as many times as needed. */ function step(ctx: ChainContext) { ctx.skip() step: switch (ctx.current[1]) { case Quantifier.ONE: { const result = ctx.current[0].match(ctx.state, ctx.str, ctx.pos) if (result) ctx.advance(result) else ctx.fail() break } case Quantifier.OPTIONAL: { const result = ctx.current[0].match(ctx.state, ctx.str, ctx.pos) if (result) ctx.add(result) ctx.advance() break } case Quantifier.ZERO_OR_MORE: { take(ctx, ctx.current[0]) ctx.advance() break } case Quantifier.ONE_OR_MORE: { const advanced = take(ctx, ctx.current[0]) if (advanced) ctx.advance() else ctx.fail() break } case Quantifier.ALTERNATIVES: { const rules = ctx.current[0] let advanced = false let couldFail = false for (let i = 0; i < rules.length; i++) { const type = rules[i][1] if (type === Quantifier.ONE || type === Quantifier.ONE_OR_MORE) { couldFail = true } switch (type) { case Quantifier.ONE: case Quantifier.OPTIONAL: { const result = rules[i][0].match(ctx.state, ctx.str, ctx.pos) if (result) { ctx.add(result) advanced = true } break } case Quantifier.ZERO_OR_MORE: case Quantifier.ONE_OR_MORE: { advanced = take(ctx, rules[i][0]) break } } // leave for loop if we advanced if (advanced) break } if (!advanced && couldFail) ctx.fail() else ctx.advance() break } case Quantifier.REPEATING_ZERO_OR_MORE: { if (ctx.advanced && ctx.nextMatches()) { ctx.advance() break } if (ctx.advanced === null) ctx.advanced = false const rules = ctx.current[0] for (let i = 0; i < rules.length; i++) { const result = rules[i].match(ctx.state, ctx.str, ctx.pos) if (result) { ctx.add(result) ctx.advanced = true break step } } ctx.advance() ctx.advanced = null break } case Quantifier.REPEATING_ONE_OR_MORE: { if (ctx.advanced && ctx.nextMatches()) { ctx.advance() break } if (ctx.advanced === null) ctx.advanced = false const rules = ctx.current[0] for (let i = 0; i < rules.length; i++) { const result = rules[i].match(ctx.state, ctx.str, ctx.pos) if (result) { ctx.add(result) ctx.advanced = true break step } } if (!ctx.advanced) ctx.fail() else ctx.advance() ctx.advanced = null break } } } /** Utility function for running a rule as many times as possible. */ function take(ctx: ChainContext, rule: Rule) { let advanced = false let result while ((result = rule.match(ctx.state, ctx.str, ctx.pos))) { ctx.add(result) ctx.skip() advanced = true } return advanced } /** Types of quantifier for a chain rule. */ enum Quantifier { /** No suffix. */ ONE, /** `?` suffix. */ OPTIONAL, /** `*` suffix. */ ZERO_OR_MORE, /** `+` suffix. */ ONE_OR_MORE, /** Chain rule strings separated by `|` pipes. */ ALTERNATIVES, /** Rule names separated by `|*` pipes. */ REPEATING_ZERO_OR_MORE, /** Rule names separated by `|+` pipes. */ REPEATING_ONE_OR_MORE } // prettier-ignore type ChainRuleSimple = [Rule, | Quantifier.ONE | Quantifier.OPTIONAL | Quantifier.ZERO_OR_MORE | Quantifier.ONE_OR_MORE ] // prettier-ignore type ChainRule = | ChainRuleSimple | [ChainRuleSimple[], Quantifier.ALTERNATIVES] | [Rule[], | Quantifier.REPEATING_ZERO_OR_MORE | Quantifier.REPEATING_ONE_OR_MORE ] /** Class used for tracking the state of a in progress chain match. */ class ChainContext { /** The current {@link GrammarState}. */ declare state: GrammarState /** The list of rules to match with. */ declare rules: ChainRule[] /** The current string to match. */ declare str: string /** The current position. */ declare pos: number /** The totality of the string that has been matched so far. */ declare total: string /** The list of results to be returned. */ declare results: Matched[] | null /** The current rule index. */ declare index: number /** If true, the current match has failed. */ declare failed: boolean /** Used for keeping track of state with the `REPEATING` quantifiers. */ declare advanced: boolean | null /** * A skip pattern to use. * * @see {@link Chain} */ declare skipMatcher?: RegExpMatcher constructor(rules: ChainRule[], skip?: RegExpMatcher) { this.rules = rules this.total = "" this.index = 0 this.failed = false this.advanced = null this.results = null if (skip) this.skipMatcher = skip } /** True if the running match has finished. */ get done() { return this.index >= this.rules.length } /** Gets the current rule, based on the current index. */ get current() { if (this.done) throw new Error("Cannot get current rule when done") return this.rules[this.index] } /** Adds a {@link Matched} to the result list. */ add(result: Matched) { if (!this.results) this.results = [] this.results.push(result) this.total += result.total this.pos += result.length } /** Sets the match to have failed. */ fail() { this.failed = true this.index = this.rules.length } /** * Advances to the next rule. * * @param result - A result to add to the results list, if desired. */ advance(result?: Matched) { if (result) this.add(result) this.index++ } /** * Returns `null` if the match failed, otherwise a list of {@link Matched} * objects will be returned. */ finish() { return this.failed ? null : this.results } /** * Checks to see if the next rule would match. Used for leaving * `REPEATING` quantifier rules early. */ nextMatches() { if (this.done) throw new Error("Cannot get next rule when done") const rule = this.rules[this.index + 1] const state = this.state.clone() switch (rule[1]) { case Quantifier.ONE: case Quantifier.OPTIONAL: case Quantifier.ZERO_OR_MORE: case Quantifier.ONE_OR_MORE: { const result = rule[0].match(state, this.str, this.pos) if (result) return true break } case Quantifier.ALTERNATIVES: { for (let i = 0; i < rule[0].length; i++) { for (let j = 0; j < rule[0][i].length; j++) { const result = rule[0][i][0].match(state, this.str, this.pos) if (result) return true } } break } case Quantifier.REPEATING_ZERO_OR_MORE: case Quantifier.REPEATING_ONE_OR_MORE: { for (let i = 0; i < rule[0].length; i++) { const result = rule[0][i].match(state, this.str, this.pos) if (result) return true } } } return false } /** Greedy consumes any characters matched by the `skip` pattern. */ skip() { if (!this.skipMatcher) return let result while ((result = this.skipMatcher.match(this.str, this.pos))) { this.pos += result.length this.total += result.total } } /** Resets the current state with the new match arguments. */ reset(state: GrammarState, str: string, pos: number) { this.state = state this.str = str this.pos = pos this.total = "" this.index = 0 this.failed = false this.advanced = null this.results = null } } /** * Parses a chain rule string, and returns the rule(s) it specifies and * what type of quantifier it uses. */ function parseChainRule(repo: Repository, str: string): ChainRule { const repeatAlternatives = /\|[*+]/.test(str) const normalAlternatives = /\|(?![*+])/.test(str) if (repeatAlternatives && normalAlternatives) { throw new Error("Cannot mix |* (or |+) and |") } if (!repeatAlternatives && !normalAlternatives) { let type = Quantifier.ONE // prettier-ignore switch (str[str.length - 1]) { case "?": type = Quantifier.OPTIONAL; break case "*": type = Quantifier.ZERO_OR_MORE; break case "+": type = Quantifier.ONE_OR_MORE; break } if (type !== Quantifier.ONE) { str = str.slice(0, str.length - 1) } const rule = repo.get(str) if (!(rule instanceof Rule)) throw new Error(`Rule "${str}" not found`) return [rule, type] } // normal alternatives else if (normalAlternatives) { const rules = str.split(/\s*\|\s*/).map(item => parseChainRule(repo, item)) return [rules as ChainRuleSimple[], Quantifier.ALTERNATIVES] } // repeating alternatives else if (repeatAlternatives) { const zeroOrMore = /\|\*/.test(str) const oneOrMore = /\|\+/.test(str) if (zeroOrMore && oneOrMore) { throw new Error("Cannot have repeating alternatives with both * and +") } const rules = str.split(/\s*\|[*+]?\s*/).map(item => repo.get(item)) if (rules.some(rule => !(rule instanceof Rule))) { throw new Error(`Rule "${str}" not found`) } return [ rules as Rule[], zeroOrMore ? Quantifier.REPEATING_ZERO_OR_MORE : Quantifier.REPEATING_ONE_OR_MORE ] } throw new Error("Unreachable") }