clarity-pattern-parser
Version:
Parsing Library for Typescript and Javascript.
790 lines (638 loc) • 29.7 kB
text/typescript
import { Node } from "../ast/Node";
import { Literal } from "../patterns/Literal";
import { Pattern } from "../patterns/Pattern";
import { Regex } from "../patterns/Regex";
import { Reference } from "../patterns/Reference";
import { grammar } from "./patterns/grammar";
import { Options } from "../patterns/Options";
import { Not } from "../patterns/Not";
import { Sequence } from "../patterns/Sequence";
import { Repeat, RepeatOptions } from "../patterns/Repeat";
import { Optional } from "../patterns/Optional";
import { Context } from "../patterns/Context";
import { Expression } from "../patterns/Expression";
import { TakeUntil } from "../patterns/TakeUntil";
import { RightAssociated } from "../patterns/RightAssociated";
import { generateErrorMessage } from "../patterns/generate_error_message";
import { tokens } from "./decorators/tokens";
let anonymousIndexId = 0;
export type Decorator = (pattern: Pattern, arg?: string | boolean | number | null | Record<string, any> | any[]) => void;
const defaultDecorators = {
tokens: tokens
};
const patternNodes: Record<string, boolean> = {
"literal": true,
"regex-literal": true,
"options-literal": true,
"sequence-literal": true,
"repeat-literal": true,
"alias-literal": true,
"take-until-literal": true,
"configurable-anonymous-pattern": true
};
class ParseContext {
patternsByName = new Map<string, Pattern>();
importedPatternsByName = new Map<string, Pattern>();
paramsByName = new Map<string, Pattern>();
decorators: Record<string, Decorator>;
constructor(params: Pattern[], decorators: Record<string, Decorator> = {}) {
params.forEach(p => this.paramsByName.set(p.name, p));
this.decorators = { ...decorators, ...defaultDecorators };
}
}
function defaultImportResolver(_path: string, _basePath: string | null): Promise<GrammarFile> {
throw new Error("No import resolver supplied.");
}
function defaultImportResolverSync(_path: string, _basePath: string | null): GrammarFile {
throw new Error("No import resolver supplied.");
}
export interface GrammarFile {
resource: string;
expression: string;
}
export interface GrammarOptions {
resolveImport?: (resource: string, originResource: string | null) => Promise<GrammarFile>;
resolveImportSync?: (resource: string, originResource: string | null) => GrammarFile;
originResource?: string | null;
params?: Pattern[];
decorators?: Record<string, Decorator>;
}
export class Grammar {
private _params: Pattern[];
private _originResource?: string | null;
private _resolveImport: (resource: string, originResource: string | null) => Promise<GrammarFile>;
private _resolveImportSync: (resource: string, originResource: string | null) => GrammarFile;
private _parseContext: ParseContext;
constructor(options: GrammarOptions = {}) {
this._params = options?.params == null ? [] : options.params;
this._originResource = options?.originResource == null ? null : options.originResource;
this._resolveImport = options.resolveImport == null ? defaultImportResolver : options.resolveImport;
this._resolveImportSync = options.resolveImportSync == null ? defaultImportResolverSync : options.resolveImportSync;
this._parseContext = new ParseContext(this._params, options.decorators || {});
}
async import(path: string) {
const grammarFile = await this._resolveImport(path, null);
const grammar = new Grammar({
resolveImport: this._resolveImport,
originResource: grammarFile.resource,
params: this._params,
decorators: this._parseContext.decorators
});
return grammar.parse(grammarFile.expression);
}
async parse(expression: string) {
this._parseContext = new ParseContext(this._params, this._parseContext.decorators);
const ast = this._tryToParse(expression);
await this._resolveImports(ast);
this._buildPatterns(ast);
return this._buildPatternRecord();
}
parseString(expression: string) {
this._parseContext = new ParseContext(this._params, this._parseContext.decorators);
const ast = this._tryToParse(expression);
this._resolveImportsSync(ast);
this._buildPatterns(ast);
return this._buildPatternRecord();
}
private _buildPatternRecord() {
const patterns: Record<string, Pattern> = {};
const allPatterns = Array.from(this._parseContext.patternsByName.values());
allPatterns.forEach(p => {
patterns[p.name] = new Context(p.name, p, allPatterns.filter(o => o !== p));
});
return patterns;
}
private _tryToParse(expression: string): Node {
const { ast, cursor } = grammar.exec(expression, true);
if (ast == null) {
const message = generateErrorMessage(grammar, cursor);
throw new Error(`[Invalid Grammar] ${message}`);
}
return ast;
}
private _hasImports(ast: Node) {
const importBlock = ast.find(n => n.name === "import-block");
if (importBlock == null) {
return false;
}
return importBlock && importBlock.children.length > 0;
}
private _buildPatterns(ast: Node) {
const body = ast.find(n => n.name === "body" && n.findAncestor(n => n.name === "head") == null);
if (body == null) {
return;
}
const statements = body.findAll(n => n.name === "assign-statement");
statements.forEach((n) => {
const patternNode = n.children.find(n => patternNodes[n.name] != null);
if (patternNode == null) {
return;
}
switch (patternNode.name) {
case "literal": {
this._saveLiteral(n);
break;
}
case "regex-literal": {
this._saveRegex(n);
break;
}
case "options-literal": {
this._saveOptions(n);
break;
}
case "sequence-literal": {
this._saveSequence(n);
break;
}
case "repeat-literal": {
this._saveRepeat(n);
break;
}
case "alias-literal": {
this._saveAlias(n);
break;
}
case "take-until-literal": {
this._saveTakeUntil(n);
break;
}
case "configurable-anonymous-pattern": {
this._saveConfigurableAnonymous(n);
break;
}
default: {
break;
}
}
});
body.findAll(n => n.name === "export-name").forEach((n) => {
const pattern = this._getPattern(n.value).clone();
this._parseContext.patternsByName.set(n.value, pattern);
});
}
private _saveLiteral(statementNode: Node) {
const nameNode = statementNode.find(n => n.name === "name") as Node;
const literalNode = statementNode.find(n => n.name === "literal") as Node;
const name = nameNode.value;
const literal = this._buildLiteral(name, literalNode);
this._applyDecorators(statementNode, literal);
this._parseContext.patternsByName.set(name, literal);
}
private _buildLiteral(name: string, node: Node) {
return new Literal(name, this._resolveStringValue(node.value));
}
private _resolveStringValue(value: string) {
return value.replace(/\\n/g, '\n')
.replace(/\\r/g, '\r')
.replace(/\\t/g, '\t')
.replace(/\\b/g, '\b')
.replace(/\\f/g, '\f')
.replace(/\\v/g, '\v')
.replace(/\\0/g, '\0')
.replace(/\\x([0-9A-Fa-f]{2})/g, (_, hex) => String.fromCharCode(parseInt(hex, 16)))
.replace(/\\u([0-9A-Fa-f]{4})/g, (_, hex) => String.fromCharCode(parseInt(hex, 16)))
.replace(/\\(.)/g, '$1').slice(1, -1);
}
private _saveRegex(statementNode: Node) {
const nameNode = statementNode.find(n => n.name === "name") as Node;
const regexNode = statementNode.find(n => n.name === "regex-literal") as Node;
const name = nameNode.value;
const regex = this._buildRegex(name, regexNode);
this._applyDecorators(statementNode, regex);
this._parseContext.patternsByName.set(name, regex);
}
private _buildRegex(name: string, node: Node) {
const value = node.value.slice(1, node.value.length - 1);
return new Regex(name, value);
}
private _saveOptions(statementNode: Node) {
const nameNode = statementNode.find(n => n.name === "name") as Node;
const name = nameNode.value;
const optionsNode = statementNode.find(n => n.name === "options-literal") as Node;
const options = this._buildOptions(name, optionsNode);
this._applyDecorators(statementNode, options);
this._parseContext.patternsByName.set(name, options);
}
private _buildOptions(name: string, node: Node) {
const patternNodes = node.children.filter(n => n.name !== "default-divider" && n.name !== "greedy-divider");
const isGreedy = node.find(n => n.name === "greedy-divider") != null;
const patterns = patternNodes.map(n => {
const rightAssociated = n.find(n => n.name === "right-associated");
if (rightAssociated != null) {
return new RightAssociated(this._buildPattern(n.children[0]));
} else {
return this._buildPattern(n.children[0]);
}
});
const hasRecursivePattern = patterns.some(p => this._isRecursive(name, p));
if (hasRecursivePattern && !isGreedy) {
try {
const expression = new Expression(name, patterns);
return expression;
} catch { }
}
const options = new Options(name, patterns, isGreedy);
return options;
}
private _isRecursive(name: string, pattern: Pattern) {
if (pattern.type === "right-associated") {
pattern = pattern.children[0];
}
return this._isRecursivePattern(name, pattern);
}
private _isRecursivePattern(name: string, pattern: Pattern) {
// Because we don't know if the pattern is a sequence with a reference we have to just assume it is.
// The better solution here would be to not have options at all and just use expresssion pattern instead.
if (pattern.type === "reference") {
return true;
}
if (pattern.children.length === 0) {
return false;
}
const firstChild = pattern.children[0];
const lastChild = pattern.children[pattern.children.length - 1];
const isLongEnough = pattern.children.length >= 2;
return pattern.type === "sequence" && isLongEnough &&
(firstChild.name === name ||
lastChild.name === name);
}
private _buildPattern(node: Node): Pattern {
const type = node.name;
const name = `anonymous-pattern-${anonymousIndexId++}`;
switch (type) {
case "pattern-name": {
return this._getPattern(node.value).clone();
}
case "literal": {
return this._buildLiteral(node.value.slice(1, -1), node);
}
case "regex-literal": {
return this._buildRegex(node.value.slice(1, -1), node);
}
case "repeat-literal": {
return this._buildRepeat(name, node);
}
case "options-literal": {
return this._buildOptions(name, node);
}
case "sequence-literal": {
return this._buildSequence(name, node);
}
case "take-until-literal": {
return this._buildTakeUntil(name, node);
}
case "complex-anonymous-pattern": {
return this._buildComplexAnonymousPattern(node);
}
}
throw new Error(`Couldn't build node: ${node.name}.`);
}
private _saveSequence(statementNode: Node) {
const nameNode = statementNode.find(n => n.name === "name") as Node;
const name = nameNode.value;
const sequenceNode = statementNode.find(n => n.name === "sequence-literal") as Node;
const sequence = this._buildSequence(name, sequenceNode);
this._applyDecorators(statementNode, sequence);
this._parseContext.patternsByName.set(name, sequence);
}
private _buildSequence(name: string, node: Node) {
const patternNodes = node.children.filter(n => n.name !== "sequence-divider");
const patterns = patternNodes.map(n => {
const patternNode = n.children[0].name === "not" ? n.children[1] : n.children[0];
const isNot = n.find(n => n.name === "not") != null;
const isOptional = n.find(n => n.name === "is-optional");
const pattern = this._buildPattern(patternNode);
const finalPattern = isOptional ? new Optional(`optional-${pattern.name}`, pattern) : pattern;
if (isNot) {
return new Not(`not-${finalPattern.name}`, finalPattern);
}
return finalPattern;
});
return new Sequence(name, patterns);
}
private _saveRepeat(statementNode: Node) {
const nameNode = statementNode.find(n => n.name === "name") as Node;
const name = nameNode.value;
const repeatNode = statementNode.find(n => n.name === "repeat-literal") as Node;
const repeat = this._buildRepeat(name, repeatNode);
this._applyDecorators(statementNode, repeat);
this._parseContext.patternsByName.set(name, repeat);
}
private _buildRepeat(name: string, repeatNode: Node) {
let isOptional = false;
const bounds = repeatNode.find(n => n.name === "bounds");
const exactCount = repeatNode.find(n => n.name === "exact-count");
const quantifier = repeatNode.find(n => n.name === "quantifier-shorthand");
const trimDivider = repeatNode.find(n => n.name === "trim-flag") != null;
const patterNode = repeatNode.children[1].type === "spaces" ? repeatNode.children[2] : repeatNode.children[1];
const pattern = this._buildPattern(patterNode);
const dividerSectionNode = repeatNode.find(n => n.name === "repeat-divider-section");
const options: RepeatOptions = {
min: 1,
max: Infinity
};
if (trimDivider) {
options.trimDivider = trimDivider;
}
if (dividerSectionNode != null) {
const dividerNode = dividerSectionNode.children[1];
options.divider = this._buildPattern(dividerNode);
}
if (bounds != null) {
const minNode = bounds.find(p => p.name === "min");
const maxNode = bounds.find(p => p.name === "max");
const min = minNode == null ? 0 : Number(minNode.value);
const max = maxNode == null ? Infinity : Number(maxNode.value);
options.min = min;
options.max = max;
} else if (exactCount != null) {
const integerNode = exactCount.find(p => p.name === "integer") as Node;
const integer = Number(integerNode.value);
options.min = integer;
options.max = integer;
} else if (quantifier != null) {
const type = quantifier.value;
if (type === "+") {
options.min = 1;
options.max = Infinity;
} else {
isOptional = true;
}
}
return isOptional ? new Optional(name, new Repeat(`inner-optional-${name}`, pattern, options)) : new Repeat(name, pattern, options);
}
private _saveTakeUntil(statementNode: Node) {
const nameNode = statementNode.find(n => n.name === "name") as Node;
const name = nameNode.value;
const takeUntilNode = statementNode.find(n => n.name === "take-until-literal") as Node;
const takeUntil = this._buildTakeUntil(name, takeUntilNode);
this._applyDecorators(statementNode, takeUntil);
this._parseContext.patternsByName.set(name, takeUntil);
}
private _buildTakeUntil(name: string, takeUntilNode: Node) {
const patternNode = takeUntilNode.children[takeUntilNode.children.length - 1];
const untilPattern = this._buildPattern(patternNode);
return new TakeUntil(name, untilPattern);
}
private _saveConfigurableAnonymous(node: Node) {
const nameNode = node.find(n => n.name === "name") as Node;
const name = nameNode.value;
const anonymousNode = node.find(n => n.name === "configurable-anonymous-pattern") as Node;
const isOptional = node.children[1] != null;
const anonymous = isOptional ? new Optional(name, this._buildPattern(anonymousNode.children[0])) : this._buildPattern(anonymousNode.children[0]);
this._applyDecorators(node, anonymous);
this._parseContext.patternsByName.set(name, anonymous);
}
private _buildComplexAnonymousPattern(node: Node) {
const wrappedNode = node.children[1].name === "line-spaces" ? node.children[2] : node.children[1];
return this._buildPattern(wrappedNode);
}
private async _resolveImports(ast: Node) {
const importStatements = ast.findAll(n => {
return n.name === "import-from" || n.name === "param-name-with-default-value";
});
for (const statement of importStatements) {
if (statement.name === "import-from") {
await this._processImport(statement);
} else {
this._processUseParams(statement);
}
}
}
private _resolveImportsSync(ast: Node) {
const importStatements = ast.findAll(n => {
return n.name === "import-from" || n.name === "param-name-with-default-value";
});
for (const statement of importStatements) {
if (statement.name === "import-from") {
this._processImportSync(statement);
} else {
this._processUseParams(statement);
}
}
}
private _processImportSync(importStatement: Node) {
const parseContext = this._parseContext;
const resourceNode = importStatement.find(n => n.name === "resource") as Node;
const params = this._getParams(importStatement);
const resource = resourceNode.value.slice(1, -1);
const grammarFile = this._resolveImportSync(resource, this._originResource || null);
const grammar = new Grammar({
resolveImport: this._resolveImport,
resolveImportSync: this._resolveImportSync,
originResource: grammarFile.resource,
params,
decorators: this._parseContext.decorators
});
try {
const patterns = grammar.parseString(grammarFile.expression);
const importStatements = importStatement.findAll(n => n.name === "import-name" || n.name === "import-alias");
importStatements.forEach((node) => {
if (node.name === "import-name" && node.parent?.name === "import-alias") {
return;
}
if (node.name === "import-name" && node.parent?.name !== "import-alias") {
const importName = node.value;
if (parseContext.importedPatternsByName.has(importName)) {
throw new Error(`'${importName}' was already used within another import.`);
}
const pattern = patterns[importName];
if (pattern == null) {
throw new Error(`Couldn't find pattern with name: ${importName}, from import: ${resource}.`);
}
parseContext.importedPatternsByName.set(importName, pattern);
} else {
const importNameNode = node.find(n => n.name === "import-name") as Node;
const importName = importNameNode.value;
const aliasNode = node.find(n => n.name === "import-name-alias") as Node;
const alias = aliasNode.value;
if (parseContext.importedPatternsByName.has(alias)) {
throw new Error(`'${alias}' was already used within another import.`);
}
const pattern = patterns[importName];
if (pattern == null) {
throw new Error(`Couldn't find pattern with name: ${importName}, from import: ${resource}.`);
}
parseContext.importedPatternsByName.set(alias, pattern.clone(alias));
}
});
} catch (e: any) {
throw new Error(`Failed loading expression from: "${resource}". Error details: "${e.message}"`);
}
}
private async _processImport(importStatement: Node) {
const parseContext = this._parseContext;
const resourceNode = importStatement.find(n => n.name === "resource") as Node;
const params = this._getParams(importStatement);
const resource = resourceNode.value.slice(1, -1);
const grammarFile = await this._resolveImport(resource, this._originResource || null);
const grammar = new Grammar({
resolveImport: this._resolveImport,
originResource: grammarFile.resource,
params,
decorators: this._parseContext.decorators
});
try {
const patterns = await grammar.parse(grammarFile.expression);
const importStatements = importStatement.findAll(n => n.name === "import-name" || n.name === "import-alias");
importStatements.forEach((node) => {
if (node.name === "import-name" && node.parent?.name === "import-alias") {
return;
}
if (node.name === "import-name" && node.parent?.name !== "import-alias") {
const importName = node.value;
if (parseContext.importedPatternsByName.has(importName)) {
throw new Error(`'${importName}' was already used within another import.`);
}
const pattern = patterns[importName];
if (pattern == null) {
throw new Error(`Couldn't find pattern with name: ${importName}, from import: ${resource}.`);
}
parseContext.importedPatternsByName.set(importName, pattern);
} else {
const importNameNode = node.find(n => n.name === "import-name") as Node;
const importName = importNameNode.value;
const aliasNode = node.find(n => n.name === "import-name-alias") as Node;
const alias = aliasNode.value;
if (parseContext.importedPatternsByName.has(alias)) {
throw new Error(`'${alias}' was already used within another import.`);
}
const pattern = patterns[importName];
if (pattern == null) {
throw new Error(`Couldn't find pattern with name: ${importName}, from import: ${resource}.`);
}
parseContext.importedPatternsByName.set(alias, pattern.clone(alias));
}
});
} catch (e: any) {
throw new Error(`Failed loading expression from: "${resource}". Error details: "${e.message}"`);
}
}
private _processUseParams(paramName: Node) {
const defaultValueNode = paramName.find(n => n.name === "param-default");
if (defaultValueNode === null) {
return;
}
const nameNode = paramName.find(n => n.name === "param-name");
const defaultNameNode = defaultValueNode.find(n => n.name === "default-param-name");
if (nameNode == null || defaultNameNode == null) {
return;
}
const name = nameNode.value;
const defaultName = defaultNameNode.value;
if (this._parseContext.paramsByName.has(name)) {
return;
}
let pattern = this._parseContext.importedPatternsByName.get(defaultName);
if (pattern == null) {
pattern = new Reference(defaultName);
}
this._parseContext.importedPatternsByName.set(name, pattern);
}
private _applyDecorators(statementNode: Node, pattern: Pattern) {
const decorators = this._parseContext.decorators;
const bodyLine = statementNode.parent;
if (bodyLine == null) {
return;
}
let prevSibling = bodyLine.previousSibling();
let decoratorNodes: Node[] = [];
while (prevSibling != null) {
if (prevSibling.find(n => n.name === "assign-statement")) {
break;
}
decoratorNodes.push(prevSibling);
prevSibling = prevSibling.previousSibling();
}
decoratorNodes = decoratorNodes.filter(n => n.find(n => n.name.includes("decorator")) != null);
decoratorNodes.forEach((d) => {
const nameNode = d.find(n => n.name === "decorator-name");
if (nameNode == null || decorators[nameNode.value] == null) {
return;
}
const nameDocorator = d.find(n => n.name === "name-decorator");
if (nameDocorator != null) {
decorators[nameNode.value](pattern);
return;
}
const methodDecorator = d.find(n => n.name === "method-decorator");
if (methodDecorator == null) {
return;
}
methodDecorator.findAll(n => n.name.includes("space")).forEach(n => n.remove());
const argsNode = methodDecorator.children[3];
if (argsNode == null || argsNode.name === "close-paren") {
decorators[nameNode.value](pattern);
} else {
decorators[nameNode.value](pattern, JSON.parse(argsNode.value));
}
});
}
private _getParams(importStatement: Node) {
let params: Pattern[] = [];
const paramsStatement = importStatement.find(n => n.name === "with-params-statement");
if (paramsStatement != null) {
const statements = paramsStatement.find(n => n.name === "body");
if (statements != null) {
const expression = statements.toString();
const importedValues = Array.from(this
._parseContext
.importedPatternsByName
.values()
);
const grammar = new Grammar({
params: [...importedValues, ...this._parseContext.paramsByName.values()],
originResource: this._originResource,
resolveImport: this._resolveImport,
decorators: this._parseContext.decorators
});
const patterns = grammar.parseString(expression);
params = Array.from(Object.values(patterns));
}
}
return params;
}
private _getPattern(name: string) {
let pattern = this._parseContext.patternsByName.get(name);
if (pattern == null) {
pattern = this._parseContext.importedPatternsByName.get(name);
}
if (pattern == null) {
pattern = this._parseContext.paramsByName.get(name);
}
if (pattern == null) {
return new Reference(name);
}
return pattern;
}
private _saveAlias(statementNode: Node) {
const nameNode = statementNode.find(n => n.name === "name") as Node;
const aliasNode = statementNode.find(n => n.name === "alias-literal") as Node;
const aliasName = aliasNode.value;
const name = nameNode.value;
const aliasPattern = this._getPattern(aliasName);
// This solves the problem for an alias pointing to a reference.
if (aliasPattern.type === "reference") {
const reference = aliasPattern.clone(name);
this._applyDecorators(statementNode, reference);
this._parseContext.patternsByName.set(name, reference);
} else {
const alias = aliasPattern.clone(name);
this._applyDecorators(statementNode, alias);
this._parseContext.patternsByName.set(name, alias);
}
}
static parse(expression: string, options?: GrammarOptions) {
const grammar = new Grammar(options);
return grammar.parse(expression);
}
static import(path: string, options?: GrammarOptions) {
const grammar = new Grammar(options);
return grammar.import(path);
}
static parseString(expression: string, options?: GrammarOptions) {
const grammar = new Grammar(options);
return grammar.parseString(expression);
}
}