UNPKG

antlr-ng

Version:

Next generation ANTLR Tool

499 lines (498 loc) 17.8 kB
var __defProp = Object.defineProperty; var __name = (target, value) => __defProp(target, "name", { value, configurable: true }); import { ATNSerializer, CharStream, CommonTokenStream } from "antlr4ng"; import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs"; import { basename, dirname, isAbsolute, join } from "node:path"; import { ANTLRv4Parser } from "./generated/ANTLRv4Parser.js"; import { ClassFactory } from "./ClassFactory.js"; import { UndefChecker } from "./UndefChecker.js"; import { AnalysisPipeline } from "./analysis/AnalysisPipeline.js"; import { LexerATNFactory } from "./automata/LexerATNFactory.js"; import { ParserATNFactory } from "./automata/ParserATNFactory.js"; import { CodeGenPipeline } from "./codegen/CodeGenPipeline.js"; import { CodeGenerator } from "./codegen/CodeGenerator.js"; import { Graph } from "./misc/Graph.js"; import { ToolANTLRLexer } from "./parse/ToolANTLRLexer.js"; import { ToolANTLRParser } from "./parse/ToolANTLRParser.js"; import { SemanticPipeline } from "./semantics/SemanticPipeline.js"; import { GrammarType } from "./support/GrammarType.js"; import { LogManager } from "./support/LogManager.js"; import { ParseTreeToASTConverter } from "./support/ParseTreeToASTConverter.js"; import { convertArrayToString } from "./support/helpers.js"; import { parseToolParameters } from "./tool-parameters.js"; import { BuildDependencyGenerator } from "./tool/BuildDependencyGenerator.js"; import { DOTGenerator } from "./tool/DOTGenerator.js"; import { ErrorManager } from "./tool/ErrorManager.js"; import { GrammarTransformPipeline } from "./tool/GrammarTransformPipeline.js"; import { IssueCode } from "./tool/Issues.js"; import { Rule } from "./tool/Rule.js"; import { Constants } from "./Constants.js"; class Tool { static { __name(this, "Tool"); } logMgr = new LogManager(); errorManager; toolParameters = { args: [], encoding: "utf-8" }; grammarFiles = new Array(); importedGrammars = /* @__PURE__ */ new Map(); constructor(args) { if (args) { this.toolParameters = parseToolParameters(args); } this.grammarFiles = this.toolParameters.args; this.errorManager = new ErrorManager( this.toolParameters.msgFormat, this.toolParameters.longMessages, this.toolParameters.warningsAreErrors ); if (args && args.length > 0 && this.grammarFiles.length === 0) { this.errorManager.toolError(IssueCode.NoGrammarsFound); } } static main(args) { const antlr = new Tool(args); try { antlr.processGrammarsOnCommandLine(); } catch { antlr.exit(1); } finally { if (antlr.toolParameters.log) { try { const logName = antlr.logMgr.save(); console.log("wrote " + logName); } catch (ioe) { antlr.errorManager.toolError(IssueCode.InternalError, ioe); } } } antlr.exit(0); } static generateInterpreterData(g) { let content = ""; content += "token literal names:\n"; let names = g.getTokenLiteralNames(); content += names.reduce((previousValue, currentValue) => { return previousValue + (currentValue ?? "null") + "\n"; }, "") + "\n"; content += "token symbolic names:\n"; names = g.getTokenSymbolicNames(); content += names.reduce((previousValue, currentValue) => { return previousValue + (currentValue ?? "null") + "\n"; }, "") + "\n"; content += "rule names:\n"; names = g.getRuleNames(); content += names.reduce((previousValue, currentValue) => { return previousValue + (currentValue ?? "null") + "\n"; }, "") + "\n"; if (g.isLexer()) { content += "channel names:\n"; content += "DEFAULT_TOKEN_CHANNEL\n"; content += "HIDDEN\n"; content += g.channelValueToNameList.join("\n") + "\n"; content += "mode names:\n"; content += [...g.modes.keys()].join("\n") + "\n"; } content += "\n"; const serializedATN = ATNSerializer.getSerialized(g.atn); content += "atn:\n"; content += convertArrayToString(serializedATN); return content.toString(); } /** * Manually get option node from tree. * * @param root The root of the grammar tree. * @param option The name of the option to find. * * @returns The option node or null if not found. */ static findOptionValueAST(root, option) { const options = root.getFirstChildWithType(ANTLRv4Parser.OPTIONS); if (options !== null && options.children.length > 0) { for (const o of options.children) { const c = o; if (c.getType() === ANTLRv4Parser.ASSIGN && c.children[0].getText() === option) { return c.children[1]; } } } return null; } processGrammarsOnCommandLine() { const sortedGrammars = this.sortGrammarByTokenVocab(this.grammarFiles); for (const t of sortedGrammars) { const g = this.createGrammar(t); g.fileName = t.fileName; if (this.toolParameters.generateDependencies) { const dep = new BuildDependencyGenerator(this, g); console.log(dep.getDependencies().render()); } else { if (this.errorManager.errors === 0) { this.process(g, true); } } } } /** * To process a grammar, we load all of its imported grammars into subordinate grammar objects. Then we merge the * imported rules into the root grammar. If a root grammar is a combined grammar, we have to extract the implicit * lexer. Once all this is done, we process the lexer first, if present, and then the parser grammar * * @param g The grammar to process. * @param genCode Whether to generate code or not. */ process(g, genCode) { g.loadImportedGrammars(/* @__PURE__ */ new Set()); const transform = new GrammarTransformPipeline(g, this); transform.process(); let lexerAST; if (g.ast.grammarType === GrammarType.Combined) { lexerAST = transform.extractImplicitLexer(g); if (lexerAST) { lexerAST.toolParameters = this.toolParameters; const lexerGrammar = ClassFactory.createLexerGrammar(this, lexerAST); lexerGrammar.fileName = g.fileName; lexerGrammar.originalGrammar = g; g.implicitLexer = lexerGrammar; lexerGrammar.implicitLexerOwner = g; this.processNonCombinedGrammar(lexerGrammar, genCode); } } if (g.implicitLexer) { g.importVocab(g.implicitLexer); } this.processNonCombinedGrammar(g, genCode); } processNonCombinedGrammar(g, genCode) { const ruleFail = this.checkForRuleIssues(g); if (ruleFail) { return; } const prevErrors = this.errorManager.errors; const sem = new SemanticPipeline(g); sem.process(); if (this.errorManager.errors > prevErrors) { return; } const codeGenerator = new CodeGenerator(g); let factory; if (g.isLexer()) { factory = new LexerATNFactory(g, codeGenerator); } else { factory = new ParserATNFactory(g); } g.atn = factory.createATN(); if (this.toolParameters.atn) { this.exportATNDotFiles(g); } if (genCode && g.tool.getNumErrors() === 0) { const interpFile = Tool.generateInterpreterData(g); try { const fileName = this.getOutputFile(g, g.name + ".interp"); writeFileSync(fileName, interpFile); } catch (ioe) { this.errorManager.toolError(IssueCode.CannotWriteFile, ioe); } } const anal = new AnalysisPipeline(g); anal.process(); if (g.tool.getNumErrors() > prevErrors) { return; } if (genCode) { const gen = new CodeGenPipeline( g, codeGenerator, this.toolParameters.generateListener, this.toolParameters.generateVisitor ); gen.process(this.toolParameters); } } /** * Important enough to avoid multiple definitions that we do very early, right after AST construction. Also check * for undefined rules in parser/lexer to avoid exceptions later. Return true if we find multiple definitions of * the same rule or a reference to an undefined rule or parser rule ref in lexer rule. * * @param g The grammar to check. * * @returns true if there are issues with the rules. */ checkForRuleIssues(g) { const rulesNode = g.ast.getFirstChildWithType(ANTLRv4Parser.RULES); const rules = [...rulesNode.getAllChildrenWithType(ANTLRv4Parser.RULE)]; for (const mode of g.ast.getAllChildrenWithType(ANTLRv4Parser.MODE)) { rules.push(...mode.getAllChildrenWithType(ANTLRv4Parser.RULE)); } let redefinition = false; const ruleToAST = /* @__PURE__ */ new Map(); for (const r of rules) { const ruleAST = r; const id = ruleAST.children[0]; const ruleName = id.getText(); const prev = ruleToAST.get(ruleName); if (prev) { const prevChild = prev.children[0]; this.errorManager.grammarError( IssueCode.RuleRedefinition, g.fileName, id.token, ruleName, prevChild.token.line ); redefinition = true; continue; } ruleToAST.set(ruleName, ruleAST); } const chk = new UndefChecker(g.isLexer(), ruleToAST, this.errorManager); chk.visitGrammar(g.ast); return redefinition || chk.badRef; } sortGrammarByTokenVocab(fileNames) { const g = new Graph(); const roots = new Array(); for (const fileName of fileNames) { const root = this.parseGrammar(fileName); if (!root) { continue; } roots.push(root); root.fileName = fileName; const grammarName = root.getGrammarName(); const tokenVocabNode = Tool.findOptionValueAST(root, "tokenVocab"); if (tokenVocabNode) { let vocabName = tokenVocabNode.getText(); const len = vocabName.length; const firstChar = vocabName.charAt(0); const lastChar = vocabName.charAt(len - 1); if (len >= 2 && firstChar === "'" && lastChar === "'") { vocabName = vocabName.substring(1, len - 1); } const lastSlash = vocabName.lastIndexOf("/"); if (lastSlash >= 0) { vocabName = vocabName.substring(lastSlash + 1); } g.addEdge(grammarName, vocabName); } g.addEdge(grammarName, grammarName); } const sortedGrammarNames = g.sort(); const sortedRoots = new Array(); for (const grammarName of sortedGrammarNames) { for (const root of roots) { if (root.getGrammarName() === grammarName) { sortedRoots.push(root); break; } } } return sortedRoots; } /** * Given the raw AST of a grammar, create a grammar object associated with the AST. Once we have the grammar object, * ensure that all nodes in tree referred to this grammar. Later, we will use it for error handling and generally * knowing from where a rule comes from. * * @param grammarAST The raw AST of the grammar. * * @returns The grammar object. */ createGrammar(grammarAST) { let g; if (grammarAST.grammarType === GrammarType.Lexer) { g = ClassFactory.createLexerGrammar(this, grammarAST); } else { g = ClassFactory.createGrammar(this, grammarAST); } GrammarTransformPipeline.setGrammarPtr(g, grammarAST); return g; } parseGrammar(fileName) { try { const encoding = this.toolParameters.encoding ?? "utf-8"; const content = readFileSync(fileName, { encoding }); const input = CharStream.fromString(content); input.name = basename(fileName); return this.parse(input); } catch (ioe) { this.errorManager.toolError(IssueCode.CannotOpenFile, ioe, fileName); throw ioe; } } /** * Convenience method to load and process an ANTLR grammar. Useful when creating interpreters. If you need to * access to the lexer grammar created while processing a combined grammar, use getImplicitLexer() on returned * grammar. * * @param fileName The name of the grammar file to load. * * @returns The grammar object. */ loadGrammar(fileName) { const grammarAST = this.parseGrammar(fileName); const g = this.createGrammar(grammarAST); g.fileName = fileName; return g; } /** * Try current dir then dir of g then lib dir. * * @param g The grammar to import. * @param nameNode The node associated with the imported grammar name. * * @returns The imported grammar or null if not found. */ loadImportedGrammar(g, nameNode) { const name = nameNode.getText(); let imported = this.importedGrammars.get(name); if (!imported) { g.tool.logInfo({ component: "grammar", msg: `load ${name} from ${g.fileName}` }); let importedFile; for (const extension of Constants.AllGrammarExtensions) { importedFile = this.getImportedGrammarFile(g, name + extension); if (importedFile) { break; } } if (!importedFile) { this.errorManager.grammarError(IssueCode.CannotFindImportedGrammar, g.fileName, nameNode.token, name); return null; } const grammarEncoding = this.toolParameters.encoding; const content = readFileSync(importedFile, { encoding: grammarEncoding }); const input = CharStream.fromString(content); input.name = basename(importedFile); const result = this.parse(input); if (!result) { return null; } imported = this.createGrammar(result); imported.fileName = importedFile; this.importedGrammars.set(result.getGrammarName(), imported); } return imported; } parseGrammarFromString(grammar) { return this.parse(CharStream.fromString(grammar)); } parse(input) { const lexer = new ToolANTLRLexer(input, this); const tokens = new CommonTokenStream(lexer); const p = new ToolANTLRParser(tokens, this); const grammarSpec = p.grammarSpec(); if (p.numberOfSyntaxErrors > 0) { return void 0; } const result = ParseTreeToASTConverter.convertGrammarSpecToAST(grammarSpec, tokens); result.toolParameters = this.toolParameters; return result; } exportATNDotFiles(g) { const dotGenerator = new DOTGenerator(g); const grammars = new Array(); grammars.push(g); const imported = g.getAllImportedGrammars(); grammars.push(...imported); for (const ig of grammars) { for (const r of ig.rules.values()) { try { const dot = dotGenerator.getDOTFromState(g.atn.ruleToStartState[r.index], g.isLexer()); this.writeDOTFile(g, r, dot); } catch (ioe) { this.errorManager.toolError(IssueCode.CannotWriteFile, ioe); throw ioe; } } } } /** * This method is used by all code generators to create new output files. If the outputDir set by -o is not present * it will be created. The final filename is sensitive to the output directory and the directory where the grammar * file was found. If -o is /tmp and the original grammar file was foo/t.g4 then output files go in /tmp/foo. * * The output dir -o spec takes precedence if it's absolute. E.g., if the grammar file dir is absolute the output * dir is given precedence. "-o /tmp /usr/lib/t.g4" results in "/tmp/T.java" as output (assuming t.g4 holds T.java). * * If no -o is specified, then just write to the directory where the grammar file was found. * * @param g The grammar for which we are generating a file. * @param fileName The name of the file to generate. * * @returns The full path to the output file. */ getOutputFile(g, fileName) { const outputDir = this.getOutputDirectory(g.fileName); const outputFile = join(outputDir, fileName); if (!existsSync(outputDir)) { mkdirSync(outputDir, { recursive: true }); } return outputFile; } getImportedGrammarFile(g, fileName) { let candidate = fileName; if (!existsSync(candidate)) { const parentDir = dirname(g.fileName); candidate = join(parentDir, fileName); if (!existsSync(candidate)) { const libDirectory = this.toolParameters.lib; if (libDirectory) { candidate = join(libDirectory, fileName); if (!existsSync(candidate)) { return void 0; } return candidate; } } } return candidate; } /** * @returns the location where ANTLR will generate output files for a given file. This is a base directory and * output files will be relative to here in some cases such as when -o option is used and input files are * given relative to the input directory. * * @param fileNameWithPath path to input source. */ getOutputDirectory(fileNameWithPath) { const dirName = dirname(fileNameWithPath); if (this.toolParameters.exactOutputDir && this.toolParameters.outputDirectory) { if (this.toolParameters.outputDirectory) { return this.toolParameters.outputDirectory; } } if (this.toolParameters.outputDirectory) { if (isAbsolute(this.toolParameters.outputDirectory)) { return this.toolParameters.outputDirectory; } return join(dirName, this.toolParameters.outputDirectory); } return dirName; } logInfo(info) { this.logMgr.log(info); } getNumErrors() { return this.errorManager.errors; } exit(e) { process.exit(e); } panic() { throw new Error("ANTLR panic"); } writeDOTFile(g, rulOrName, dot) { const name = rulOrName instanceof Rule ? rulOrName.g.name + "." + rulOrName.name : rulOrName; const fileName = this.getOutputFile(g, name + ".dot"); writeFileSync(fileName, dot); } static { ClassFactory.createTool = () => { return new Tool(); }; } } export { Tool };