UNPKG

clarity-pattern-parser

Version:

Parsing Library for Typescript and Javascript.

594 lines (460 loc) 18.2 kB
import { Node } from "../ast/Node"; import { Cursor } from "./Cursor"; import { ParseResult } from "./ParseResult"; import { Pattern } from "./Pattern"; import { findPattern } from "./findPattern"; import { Sequence } from "./Sequence"; import { Association, PrecedenceTree } from './PrecedenceTree'; import { testPattern } from "./testPattern"; import { execPattern } from "./execPattern"; import { Reference } from "./Reference"; let indexId = 0; export class Expression implements Pattern { private _id: string; private _type: string; private _name: string; private _originalName: string; private _parent: Pattern | null; private _firstIndex: number; private _originalPatterns: Pattern[]; private _patterns: Pattern[]; private _atomPatterns: Pattern[]; private _prefixPatterns: Pattern[]; private _prefixNames: string[]; private _postfixPatterns: Pattern[]; private _postfixNames: string[]; private _binaryPatterns: Pattern[]; private _binaryNames: string[]; private _associationMap: Record<string, Association>; private _precedenceMap: Record<string, number>; private _shouldStopParsing: boolean; private _precedenceTree: PrecedenceTree; private _hasOrganized: boolean; private _atomsIdToAncestorsMap: Record<string, Pattern[]> get id(): string { return this._id; } get type(): string { return this._type; } get name(): string { return this._name; } get parent(): Pattern | null { return this._parent; } set parent(pattern: Pattern | null) { this._parent = pattern; } get children(): Pattern[] { return this._patterns; } get prefixPatterns(): readonly Pattern[] { return this._prefixPatterns; } get atomPatterns(): readonly Pattern[] { return this._atomPatterns; } get postfixPatterns(): readonly Pattern[] { return this._postfixPatterns; } get binaryPatterns(): readonly Pattern[] { return this._binaryPatterns; } get originalPatterns(): readonly Pattern[] { return this._originalPatterns; } get startedOnIndex() { return this._firstIndex; } constructor(name: string, patterns: Pattern[]) { if (patterns.length === 0) { throw new Error("Need at least one pattern with an 'expression' pattern."); } this._id = `expression-${indexId++}`; this._type = "expression"; this._name = name; this._originalName = name; this._parent = null; this._firstIndex = 0; this._atomPatterns = []; this._prefixPatterns = []; this._prefixNames = []; this._postfixPatterns = []; this._postfixNames = []; this._binaryPatterns = []; this._binaryNames = []; this._associationMap = {}; this._precedenceMap = {}; this._originalPatterns = patterns; this._shouldStopParsing = false; this._hasOrganized = false; this._patterns = []; this._precedenceTree = new PrecedenceTree({}, {}); this._atomsIdToAncestorsMap = {}; } private _organizePatterns(patterns: Pattern[]) { const finalPatterns: Pattern[] = []; patterns.forEach((pattern, index) => { if (this._isAtom(pattern)) { const atom = pattern.clone(); atom.parent = this; this._atomPatterns.push(atom); finalPatterns.push(atom); } else if (this._isPrefix(pattern)) { const name = this._extractName(pattern); const prefix = this._extractPrefix(pattern); prefix.parent = this; this._precedenceMap[name] = index; this._prefixPatterns.push(prefix); this._prefixNames.push(name); finalPatterns.push(prefix); } else if (this._isPostfix(pattern)) { const name = this._extractName(pattern); const postfix = this._extractPostfix(pattern); postfix.parent = this; this._precedenceMap[name] = index; this._postfixPatterns.push(postfix); this._postfixNames.push(name); finalPatterns.push(postfix); } else if (this._isBinary(pattern)) { const name = this._extractName(pattern); const binary = this._extractBinary(pattern); binary.parent = this; this._precedenceMap[name] = index; this._binaryPatterns.push(binary); this._binaryNames.push(name); if (pattern.type === "right-associated") { this._associationMap[name] = Association.right; } else { this._associationMap[name] = Association.left; } finalPatterns.push(binary); } }); this._patterns = finalPatterns; this._precedenceTree = new PrecedenceTree(this._precedenceMap, this._associationMap); return finalPatterns; } private _cacheAncestors() { for (let atom of this._atomPatterns) { const id = atom.id; const ancestors: Pattern[] = this._atomsIdToAncestorsMap[id] = []; let pattern: Pattern | null = this.parent; while (pattern != null) { if (pattern.id === id) { ancestors.push(pattern); } pattern = pattern.parent; } } } private _extractName(pattern: Pattern) { if (pattern.type === "right-associated") { return pattern.children[0].name; } return pattern.name; } private _isPrefix(pattern: Pattern) { pattern = this._unwrapAssociationIfNecessary(pattern); const lastChild = pattern.children[pattern.children.length - 1]; const referenceCount = this._referenceCount(pattern); const lastChildIsReference = this._isRecursiveReference(lastChild); return lastChildIsReference && referenceCount === 1; } private _extractPrefix(pattern: Pattern) { pattern = this._unwrapAssociationIfNecessary(pattern); return new Sequence(`${pattern.name}-prefix`, pattern.children.slice(0, -1)); } private _isAtom(pattern: Pattern) { pattern = this._unwrapAssociationIfNecessary(pattern); const firstChild = pattern.children[0]; const lastChild = pattern.children[pattern.children.length - 1]; const firstChildIsReference = this._isRecursiveReference(firstChild); const lastChildIsReference = this._isRecursiveReference(lastChild); return !firstChildIsReference && !lastChildIsReference; } private _isPostfix(pattern: Pattern) { pattern = this._unwrapAssociationIfNecessary(pattern); const firstChild = pattern.children[0]; const referenceCount = this._referenceCount(pattern); const firstChildIsReference = this._isRecursiveReference(firstChild); return firstChildIsReference && referenceCount === 1; } private _extractPostfix(pattern: Pattern) { pattern = this._unwrapAssociationIfNecessary(pattern); return new Sequence(`${pattern.name}-postfix`, pattern.children.slice(1)); } private _isBinary(pattern: Pattern) { pattern = this._unwrapAssociationIfNecessary(pattern); const firstChild = pattern.children[0]; const lastChild = pattern.children[pattern.children.length - 1]; const firstChildIsReference = this._isRecursiveReference(firstChild); const lastChildIsReference = this._isRecursiveReference(lastChild); return firstChildIsReference && lastChildIsReference && pattern.children.length > 2; } private _extractBinary(pattern: Pattern) { pattern = this._unwrapAssociationIfNecessary(pattern); const children = pattern.children.slice(1, -1); const binarySequence = new Sequence(`${pattern.name}-delimiter`, children); return binarySequence; } private _unwrapAssociationIfNecessary(pattern: Pattern) { if (pattern.type === "right-associated") { pattern = pattern.children[0]; } if (pattern.type === "reference") { pattern.parent = this; pattern = (pattern as Reference).getReferencePatternSafely(); pattern.parent = null; } return pattern; } private _referenceCount(pattern: Pattern) { return pattern.children.filter(p => this._isRecursiveReference(p)).length; } private _isRecursiveReference(pattern: Pattern) { if (pattern == null) { return false; } return pattern.name === this._originalName; } build() { if (!this._hasOrganized) { this._hasOrganized = true; this._organizePatterns(this._originalPatterns); this._cacheAncestors(); } } parse(cursor: Cursor): Node | null { this._firstIndex = cursor.index; this.build(); // If there are not any atom nodes then nothing can be found. if (this._atomPatterns.length < 1) { cursor.moveTo(this._firstIndex); cursor.recordErrorAt(this._firstIndex, this._firstIndex, this); return null; } const node = this._tryToParse(cursor); if (node != null) { node.normalize(this._firstIndex); cursor.moveTo(node.lastIndex); cursor.resolveError(); return node; } cursor.moveTo(this._firstIndex); cursor.recordErrorAt(this._firstIndex, this._firstIndex, this); return null; } private _tryToParse(cursor: Cursor): Node | null { this._shouldStopParsing = false; while (true) { cursor.resolveError(); this._tryToMatchPrefix(cursor); if (this._shouldStopParsing) { break; } this._tryToMatchAtom(cursor); if (this._shouldStopParsing) { break; } this._tryToMatchPostfix(cursor); if (this._shouldStopParsing) { break; } if (this._precedenceTree.hasAtom()) { this._tryToMatchBinary(cursor); if (this._shouldStopParsing) { break; } } else { break; } } return this._precedenceTree.commit(); } private _tryToMatchPrefix(cursor: Cursor) { let onIndex = cursor.index; for (let i = 0; i < this._prefixPatterns.length; i++) { const pattern = this._prefixPatterns[i]; const name = this._prefixNames[i]; const node = pattern.parse(cursor); if (node != null) { this._precedenceTree.addPrefix(name, ...node.children); if (cursor.hasNext()) { cursor.next(); onIndex = cursor.index; i = -1; continue; } else { this._shouldStopParsing = true; break; } } else { cursor.moveTo(onIndex); cursor.resolveError(); } } } private _tryToMatchAtom(cursor: Cursor) { let onIndex = cursor.index; for (let i = 0; i < this._atomPatterns.length; i++) { cursor.moveTo(onIndex); const pattern = this._atomPatterns[i]; if (this._isBeyondRecursiveAllowance(pattern, onIndex)) { continue; } const node = pattern.parse(cursor); if (node != null) { this._precedenceTree.addAtom(node); if (cursor.hasNext()) { cursor.next(); } else { this._shouldStopParsing = true; } break; } else { cursor.resolveError(); cursor.moveTo(onIndex); } } } private _isBeyondRecursiveAllowance(atom: Pattern, onIndex: number) { const ancestors = this._atomsIdToAncestorsMap[atom.id]; return ancestors.some(a => a.startedOnIndex === onIndex); } private _tryToMatchPostfix(cursor: Cursor) { let onIndex = cursor.index; for (let i = 0; i < this._postfixPatterns.length; i++) { const pattern = this._postfixPatterns[i]; const name = this._postfixNames[i]; const node = pattern.parse(cursor); if (node != null) { this._precedenceTree.addPostfix(name, ...node.children); if (cursor.hasNext()) { cursor.next(); onIndex = cursor.index; i = -1; continue; } else { this._shouldStopParsing = true; break; } } else { cursor.moveTo(onIndex); cursor.resolveError(); } } } private _tryToMatchBinary(cursor: Cursor) { let onIndex = cursor.index; let foundMatch = false; if (this.binaryPatterns.length === 0) { this._shouldStopParsing = true; } for (let i = 0; i < this._binaryPatterns.length; i++) { cursor.moveTo(onIndex); const pattern = this._binaryPatterns[i]; const name = this._binaryNames[i]; const node = pattern.parse(cursor); if (node != null) { foundMatch = true; this._precedenceTree.addBinary(name, ...node.children); if (cursor.hasNext()) { cursor.next(); } else { this._shouldStopParsing = true; } break; } else { cursor.moveTo(onIndex); cursor.resolveError(); } } if (!foundMatch) { this._shouldStopParsing = true; } } test(text: string, record = false): boolean { return testPattern(this, text, record); } exec(text: string, record = false): ParseResult { return execPattern(this, text, record); } getTokens(): string[] { const atomTokens = this._atomPatterns.map(p => p.getTokens()).flat(); const prefixTokens = this.prefixPatterns.map(p => p.getTokens()).flat(); return [...prefixTokens, ...atomTokens]; } getTokensAfter(childReference: Pattern): string[] { if (this._prefixPatterns.includes(childReference) || this._binaryPatterns.includes(childReference)) { const atomTokens = this._atomPatterns.map(p => p.getTokens()).flat(); const prefixTokens = this.prefixPatterns.map(p => p.getTokens()).flat(); return [...prefixTokens, ...atomTokens]; } if (this._atomPatterns.includes(childReference)) { const postfixTokens = this.prefixPatterns.map(p => p.getTokens()).flat(); if (postfixTokens.length === 0) { return this._binaryPatterns.map(p => p.getTokens()).flat(); } return postfixTokens; } if (this._postfixPatterns.includes(childReference)) { const postfixTokens = this.postfixPatterns.map(p => p.getTokens()).flat(); const binaryTokens = this._binaryPatterns.map(p => p.getTokens()).flat(); return [...postfixTokens, ...binaryTokens]; } return []; } getNextTokens(): string[] { if (this._parent == null) { return []; } return this._parent.getTokensAfter(this); } getPatterns(): Pattern[] { const atomPatterns = this._atomPatterns.map(p => p.getPatterns()).flat(); const prefixPatterns = this.prefixPatterns.map(p => p.getPatterns()).flat(); return [...prefixPatterns, ...atomPatterns]; } getPatternsAfter(childReference: Pattern): Pattern[] { if (this._prefixPatterns.includes(childReference) || this._binaryPatterns.includes(childReference)) { const atomPatterns = this._atomPatterns.map(p => p.getPatterns()).flat(); const prefixPatterns = this.prefixPatterns.map(p => p.getPatterns()).flat(); return [...prefixPatterns, ...atomPatterns]; } if (this._atomPatterns.includes(childReference)) { const postfixPatterns = this.prefixPatterns.map(p => p.getPatterns()).flat(); if (postfixPatterns.length === 0) { return this._binaryPatterns.map(p => p.getPatterns()).flat(); } return postfixPatterns; } if (this._postfixPatterns.includes(childReference)) { const postfixPaterns = this.postfixPatterns.map(p => p.getPatterns()).flat(); const binaryPatterns = this._binaryPatterns.map(p => p.getPatterns()).flat(); return [...postfixPaterns, ...binaryPatterns]; } return []; } getNextPatterns(): Pattern[] { if (this._parent == null) { return []; } return this._parent.getPatternsAfter(this); } find(predicate: (p: Pattern) => boolean): Pattern | null { return findPattern(this, predicate); } clone(name = this._name): Pattern { const clone = new Expression(name, this._originalPatterns); clone._originalName = this._originalName; clone._id = this._id; return clone; } isEqual(pattern: Expression): boolean { return pattern.type === this.type && this.children.every((c, index) => c.isEqual(pattern.children[index])); } }