parsergen-starter
Version:
A complete parser generator starter with PEG.js, optional Moo lexer, and VS Code integration
678 lines (599 loc) • 22.2 kB
text/typescript
import fs from 'node:fs/promises';
import { readFileSync } from 'node:fs';
import { watchFile } from 'node:fs';
import { argv } from 'node:process';
import { resolve } from 'node:path';
import {
compileGrammarFromFile,
validateGrammar,
analyzeGrammarAdvanced,
CompiledGrammar,
} from '../grammar/index.js';
import { parseInput, ParserUtils } from '../parser/index.js';
import { formatError, formatCompilationError } from '../utils/index.js';
// Define the valid format types with const assertion for type safety
const VALID_FORMATS = ['bare', 'commonjs', 'es', 'globals', 'umd'] as const;
type OutputFormat = typeof VALID_FORMATS[number];
// Tool version (can be dynamically loaded from package.json in a real app)
const TOOL_VERSION = '0.1.0';
// Color support detection
const supportsColor = process.stdout.isTTY && process.env.NO_COLOR !== '1';
const colors = {
red: supportsColor ? '\x1b[31m' : '',
green: supportsColor ? '\x1b[32m' : '',
yellow: supportsColor ? '\x1b[33m' : '',
blue: supportsColor ? '\x1b[34m' : '',
magenta: supportsColor ? '\x1b[35m' : '',
cyan: supportsColor ? '\x1b[36m' : '',
reset: supportsColor ? '\x1b[0m' : '',
bold: supportsColor ? '\x1b[1m' : '',
dim: supportsColor ? '\x1b[2m' : '',
};
// Enhanced logging utilities
const log = {
info: (msg: string) => console.log(`${colors.blue}ℹ️${colors.reset} ${msg}`),
success: (msg: string) => console.log(`${colors.green}✅${colors.reset} ${msg}`),
error: (msg: string) => console.error(`${colors.red}❌${colors.reset} ${msg}`),
warn: (msg: string) => console.warn(`${colors.yellow}⚠️${colors.reset} ${msg}`),
debug: (msg: string) => console.log(`${colors.dim}🐛 ${msg}${colors.reset}`),
watch: (msg: string) => console.log(`${colors.cyan}👀${colors.reset} ${msg}`),
build: (msg: string) => console.log(`${colors.magenta}🔧${colors.reset} ${msg}`),
analyze: (msg: string) => console.log(`${colors.yellow}📊${colors.reset} ${msg}`),
};
// Configuration interface
interface CLIConfig {
grammarPath?: string; // Made optional as init/version don't require it
testInput?: string;
outFile?: string;
format: OutputFormat;
validate: boolean;
analyze: boolean;
ast: boolean;
watch: boolean;
verbose: boolean;
interactive: boolean;
benchmark: boolean;
help: boolean;
initProject: boolean;
version: boolean;
transformScript?: string; // New: Path to AST transformation script
codegenScript?: string; // New: Path to code generation script
}
function printHelp() {
console.log(`
${colors.bold}parsergen${colors.reset} - Advanced PEG Grammar Parser Generator
${colors.bold}USAGE:${colors.reset}
parsergen <grammar.peg> [options]
parsergen --init [options]
parsergen --version
${colors.bold}COMMANDS:${colors.reset}
${colors.green}--init${colors.reset} Initialize a new parser project with default files.
${colors.green}--version${colors.reset} Display the current version of parsergen.
${colors.bold}OPTIONS:${colors.reset}
${colors.green}--test <input>${colors.reset} Test grammar by parsing input string
${colors.green}--test-file <file>${colors.reset} Test grammar by parsing file content
${colors.green}--validate${colors.reset} Only validate grammar (no parsing)
${colors.green}--analyze${colors.reset} Show detailed grammar metadata
${colors.green}--out <file>${colors.reset} Output compiled parser as JS
${colors.green}--format <target>${colors.reset} Format: ${VALID_FORMATS.join(' | ')} (default: es)
${colors.green}--ast${colors.reset} Print parse AST in JSON format
${colors.green}--transform <script.js>${colors.reset} Apply a JS transformation script to the AST.
${colors.green}--codegen <script.js>${colors.reset} Apply a JS code generation script to the AST.
${colors.green}--watch${colors.reset} Watch grammar file and auto-recompile
${colors.green}--verbose, -v${colors.reset} Enable verbose output
${colors.green}--interactive, -i${colors.reset} Interactive mode for testing
${colors.green}--benchmark${colors.reset} Benchmark parsing performance
${colors.green}--no-color${colors.reset} Disable colored output
${colors.green}--help, -h${colors.reset} Show this help
${colors.bold}EXAMPLES:${colors.reset}
parsergen grammar.peg --test "hello world"
parsergen grammar.peg --out parser.js --format commonjs --watch
parsergen grammar.peg --analyze --verbose
parsergen grammar.peg --interactive
parsergen mylang.peg --test "input" --transform ast-transform.js --codegen generate-js.js
parsergen --init
parsergen --version
${colors.bold}FORMATS:${colors.reset}
${colors.cyan}bare${colors.reset} - Bare parser function
${colors.cyan}commonjs${colors.reset} - CommonJS module
${colors.cyan}es${colors.reset} - ES6 module (default)
${colors.cyan}globals${colors.reset} - Global variable
${colors.cyan}umd${colors.reset} - Universal Module Definition
`);
}
function parseArgs(args: string[]): CLIConfig {
const config: CLIConfig = {
grammarPath: args[0] && !args[0].startsWith('--') ? args[0] : undefined,
format: 'es',
validate: false,
analyze: false,
ast: false,
watch: false,
verbose: false,
interactive: false,
benchmark: false,
help: false,
initProject: false,
version: false,
};
// Check for commands that don't require a grammar file first
if (args.includes('--help') || args.includes('-h')) {
config.help = true;
}
if (args.includes('--version')) {
config.version = true;
}
if (args.includes('--init')) {
config.initProject = true;
}
// Parse other options
for (let i = 0; i < args.length; i++) {
const arg = args[i];
const nextArg = args[i + 1];
switch (arg) {
case '--test':
if (nextArg) {
config.testInput = nextArg;
i++;
}
break;
case '--test-file':
if (nextArg) {
try {
config.testInput = readFileSync(nextArg, 'utf-8');
i++;
} catch {
log.error(`Could not read test file: ${nextArg}`);
process.exit(1);
}
}
break;
case '--out':
if (nextArg) {
config.outFile = nextArg;
i++;
}
break;
case '--format':
if (nextArg && isValidFormat(nextArg)) {
config.format = nextArg;
i++;
} else {
log.error(`Invalid format: ${nextArg}. Valid formats: ${VALID_FORMATS.join(', ')}`);
process.exit(1);
}
break;
case '--transform': // New option parsing
if (nextArg) {
config.transformScript = nextArg;
i++;
}
break;
case '--codegen': // New option parsing
if (nextArg) {
config.codegenScript = nextArg;
i++;
}
break;
case '--validate':
config.validate = true;
break;
case '--analyze':
config.analyze = true;
break;
case '--ast':
config.ast = true;
break;
case '--watch':
config.watch = true;
break;
case '--verbose':
case '-v':
config.verbose = true;
break;
case '--interactive':
case '-i':
config.interactive = true;
break;
case '--benchmark':
config.benchmark = true;
break;
// --help, --version, --init are handled above
}
}
// If a grammar path was provided as the first argument, ensure it's captured
if (args.length > 0 && !args[0].startsWith('--') && !config.grammarPath) {
config.grammarPath = args[0];
}
return config;
}
function isValidFormat(format: string): format is OutputFormat {
return VALID_FORMATS.includes(format as OutputFormat);
}
// Use the directly imported CompiledGrammar type
async function benchmarkParsing(parser: CompiledGrammar<unknown>, input: string, iterations: number = 1000) {
log.build(`Running benchmark with ${iterations} iterations...`);
const start = performance.now();
let successCount = 0;
let errorCount = 0;
for (let i = 0; i < iterations; i++) {
try {
// Use the directly imported parseInput and ParserUtils
const result = parseInput(parser, input);
if (!ParserUtils.isParseError(result)) {
successCount++;
} else {
errorCount++;
}
} catch {
errorCount++;
}
}
const end = performance.now();
const totalTime = end - start;
const avgTime = totalTime / iterations;
console.log(`
${colors.bold}BENCHMARK RESULTS:${colors.reset}
Total time: ${totalTime.toFixed(2)}ms
Average per parse: ${avgTime.toFixed(4)}ms
Successful parses: ${colors.green}${successCount}${colors.reset}
Failed parses: ${colors.red}${errorCount}${colors.reset}
Throughput: ${((iterations / totalTime) * 1000).toFixed(2)} parses/second
`);
}
// Use the directly imported CompiledGrammar type
async function interactiveMode(parser: CompiledGrammar<unknown>, verbose: boolean) {
log.info('Entering interactive mode. Type "exit" to quit, "help" for commands.');
const readline = await import('node:readline');
const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
});
const askQuestion = (prompt: string): Promise<string> => {
return new Promise((resolve) => {
rl.question(prompt, resolve);
});
};
while (true) {
try {
const input = await askQuestion(`${colors.cyan}parser>${colors.reset} `);
if (input.toLowerCase() === 'exit') {
break;
}
if (input.toLowerCase() === 'help') {
console.log(`
Interactive Commands:
exit - Exit interactive mode
help - Show this help
benchmark <n> - Run benchmark with n iterations
ast on/off - Toggle AST output
<input> - Parse the input string
`);
continue;
}
if (input.startsWith('benchmark ')) {
const iterations = parseInt(input.split(' ')[1]) || 1000;
await benchmarkParsing(parser, 'test', iterations);
continue;
}
// Use the directly imported parseInput and ParserUtils
const result = parseInput(parser, input);
if (!ParserUtils.isParseError(result)) {
log.success('Parse successful');
if (verbose) {
console.log(JSON.stringify(result.result, null, 2));
}
} else {
log.error('Parse failed');
console.error(formatError(result));
}
} catch {
log.error(`Unexpected error occurred`);
}
}
rl.close();
}
async function compileAndWrite(grammarPath: string, outFile: string, format: OutputFormat, verbose: boolean = false) {
try {
const grammarText = await fs.readFile(grammarPath, 'utf-8');
if (verbose) {
log.debug(`Reading grammar from: ${grammarPath}`);
log.debug(`Output format: ${format}`);
}
// Import Peggy directly to generate source code
const PEG = await import('peggy');
// Create format-specific options for Peggy
const baseOptions = {
allowedStartRules: ['*'],
cache: false,
optimize: 'speed' as const,
output: 'source' as const,
trace: verbose,
};
let compiledSource: string;
// Handle each format with proper typing
switch (format) {
case 'bare':
compiledSource = PEG.generate(grammarText, {
...baseOptions,
format: 'bare' as const,
});
break;
case 'commonjs':
compiledSource = PEG.generate(grammarText, {
...baseOptions,
format: 'commonjs' as const,
});
break;
case 'es':
compiledSource = PEG.generate(grammarText, {
...baseOptions,
format: 'es' as const,
});
break;
case 'globals':
compiledSource = PEG.generate(grammarText, {
...baseOptions,
format: 'globals' as const,
exportVar: 'Parser',
});
break;
case 'umd':
compiledSource = PEG.generate(grammarText, {
...baseOptions,
format: 'umd' as const,
exportVar: 'Parser',
});
break;
default:
throw new Error(`Unsupported format: ${format}`);
}
await fs.writeFile(outFile, compiledSource, 'utf-8');
log.success(`Parser compiled: ${outFile}`);
if (verbose) {
const stats = await fs.stat(outFile);
log.debug(`Output file size: ${stats.size} bytes`);
}
} catch (error: unknown) {
log.error('Compilation failed');
if (verbose) {
console.error(error);
}
throw new Error('Compilation failed');
}
}
async function initializeProject() {
log.info('Initializing new parser project...');
const grammarFileName = 'grammar.peg';
const configFileName = '.parsergenrc';
const transformFileName = 'transform.js';
const codegenFileName = 'codegen.js';
const dummyGrammarContent = `// My PEG Grammar
start = "Hello" _ "World" { return { type: "Greeting", value: "Hello World" }; }
_ = [ \\t]+
`;
const dummyConfigContent = JSON.stringify({
grammarFile: grammarFileName,
outputFile: 'parser.js',
format: 'es',
verbose: true,
transformScript: transformFileName,
codegenScript: codegenFileName,
}, null, 2);
const dummyTransformContent = `
/**
* Transforms the parsed AST.
* @param {any} ast - The Abstract Syntax Tree parsed by the grammar.
* @returns {any} The transformed AST.
*/
export default function transform(ast) {
console.log('Applying AST transformation...');
// Example: Modify the AST
if (ast && ast.type === 'Greeting') {
return { ...ast, transformed: true, message: 'AST transformed successfully!' };
}
return ast;
}
`;
const dummyCodegenContent = `
/**
* Generates code from the (potentially transformed) AST.
* @param {any} ast - The Abstract Syntax Tree (or transformed AST).
* @returns {string} The generated code.
*/
export default function codegen(ast) {
console.log('Generating code from AST...');
if (ast && ast.type === 'Greeting' && ast.transformed) {
return \`console.log("Transformed greeting: \${ast.value} - \${ast.message}");\`;
} else if (ast && ast.type === 'Greeting') {
return \`console.log("Original greeting: \${ast.value}");\`;
}
return \`console.log("Could not generate code for unknown AST type.");\`;
}
`;
try {
await fs.writeFile(grammarFileName, dummyGrammarContent, 'utf-8');
log.success(`Created ${grammarFileName}`);
await fs.writeFile(configFileName, dummyConfigContent, 'utf-8');
log.success(`Created ${configFileName}`);
await fs.writeFile(transformFileName, dummyTransformContent, 'utf-8');
log.success(`Created ${transformFileName}`);
await fs.writeFile(codegenFileName, dummyCodegenContent, 'utf-8');
log.success(`Created ${codegenFileName}`);
log.info('Project initialized successfully!');
log.info(`You can now edit '${grammarFileName}', '${transformFileName}', '${codegenFileName}'`);
log.info(`Try: 'parsergen ${grammarFileName} --test "Hello World" --transform ${transformFileName} --codegen ${codegenFileName}'`);
} catch (error) {
log.error(`Failed to initialize project: ${error instanceof Error ? error.message : String(error)}`);
process.exit(1);
}
}
async function main() {
const config = parseArgs(argv.slice(2));
if (config.help) {
printHelp();
return;
}
if (config.version) {
log.info(`parsergen version: ${TOOL_VERSION}`);
return;
}
if (config.initProject) {
await initializeProject();
return;
}
// If no grammarPath is provided and it's not an init/version/help command, show help
if (!config.grammarPath) {
log.error('No grammar file specified.');
printHelp();
process.exit(1);
}
// Assert grammarPath is a string after the check, ensuring TypeScript knows its type
const grammarFilePath: string = config.grammarPath;
try {
// Check if grammar file exists
await fs.access(grammarFilePath);
} catch {
log.error(`Grammar file not found: ${grammarFilePath}`);
process.exit(1);
}
if (config.verbose) {
log.debug(`Using grammar file: ${grammarFilePath}`);
}
try {
const grammarText = await fs.readFile(grammarFilePath, 'utf-8');
// Validate grammar
if (config.validate) {
const result = validateGrammar(grammarText);
if (result.valid) {
log.success('Grammar is valid');
} else {
log.error(`Grammar validation failed:\n${result.error}`);
process.exit(1);
}
return;
}
// Analyze grammar
if (config.analyze) {
const metadata = analyzeGrammarAdvanced(grammarText);
log.analyze('Grammar Analysis:');
console.log(JSON.stringify(metadata, null, 2));
return;
}
// Watch mode
if (config.watch && config.outFile) {
log.watch(`Watching ${grammarFilePath} for changes...`);
await compileAndWrite(grammarFilePath, config.outFile, config.format, config.verbose);
watchFile(grammarFilePath, { interval: 300 }, async () => {
try {
log.build('Detected change, recompiling...');
await compileAndWrite(grammarFilePath, config.outFile!, config.format, config.verbose);
} catch (err: unknown) {
log.error('Rebuild failed');
if (config.verbose) {
console.error(formatCompilationError(err, grammarText));
}
}
});
// Keep process alive
process.on('SIGINT', () => {
log.info('Stopping watch mode...');
process.exit(0);
});
return;
}
// Compile to file
if (config.outFile) {
await compileAndWrite(grammarFilePath, config.outFile, config.format, config.verbose);
return;
}
// Default: compile grammar in memory
const parser = await compileGrammarFromFile(grammarFilePath);
log.success(`Grammar compiled: ${grammarFilePath}`);
// Interactive mode
if (config.interactive) {
await interactiveMode(parser, config.ast || config.verbose);
return;
}
// Test parsing
if (config.testInput) {
if (config.benchmark) {
await benchmarkParsing(parser, config.testInput);
return;
}
const result = parseInput(parser, config.testInput);
if (!ParserUtils.isParseError(result)) {
log.success('Parse successful');
let ast = result.result;
// Apply AST transformation if script is provided
if (config.transformScript) {
try {
const transformPath = resolve(process.cwd(), config.transformScript);
log.info(`Loading AST transformation script: ${transformPath}`);
const { default: transformFn } = await import(transformPath);
if (typeof transformFn === 'function') {
ast = transformFn(ast);
log.success('AST transformed.');
} else {
log.warn(`Transformation script '${config.transformScript}' does not export a default function.`);
}
} catch (transformErr) {
log.error(`Failed to apply AST transformation: ${transformErr instanceof Error ? transformErr.message : String(transformErr)}`);
process.exit(1);
}
}
// Print AST if requested (after transformation)
if (config.ast) {
log.info('Parsed AST (after transformation if applied):');
console.log(JSON.stringify(ast, null, 2));
}
// Apply code generation if script is provided
if (config.codegenScript) {
try {
const codegenPath = resolve(process.cwd(), config.codegenScript);
log.info(`Loading code generation script: ${codegenPath}`);
const { default: codegenFn } = await import(codegenPath);
if (typeof codegenFn === 'function') {
const generatedCode = codegenFn(ast);
log.success('Code generated.');
console.log(`${colors.bold}GENERATED CODE:${colors.reset}\n${generatedCode}`);
} else {
log.warn(`Code generation script '${config.codegenScript}' does not export a default function.`);
}
} catch (codegenErr) {
log.error(`Failed to generate code: ${codegenErr instanceof Error ? codegenErr.message : String(codegenErr)}`);
process.exit(1);
}
}
} else {
log.error('Parse failed');
console.error(formatError(result));
process.exit(1);
}
} else {
log.info('No test input provided. Grammar compiled successfully.');
if (config.verbose) {
log.info('Use --test <input> to test parsing or --interactive for interactive mode.');
}
}
} catch (err: unknown) {
log.error('An error occurred');
if (config.verbose) {
console.error(err);
} else {
console.error(formatCompilationError(err, ''));
}
process.exit(1);
}
}
// Handle uncaught exceptions gracefully
process.on('uncaughtException', (err) => {
log.error(`Uncaught exception: ${err.message}`);
process.exit(1);
});
process.on('unhandledRejection', (reason) => {
log.error(`Unhandled rejection: ${reason}`);
process.exit(1);
});
main();