langium-cli
Version:
CLI for Langium - the language engineering tool
217 lines • 9.18 kB
JavaScript
/******************************************************************************
* Copyright 2025 TypeFox GmbH
* This program and the accompanying materials are made available under the
* terms of the MIT License, which is available in the project root.
******************************************************************************/
import { CstUtils, GrammarAST as ast } from 'langium';
import { EOL } from 'langium/generate';
import _ from 'lodash';
export function generateBnf(grammars, options = { dialect: 'GBNF' }) {
const grammarsWithName = grammars.filter(grammar => !!grammar.name);
const isHiddenTerminalRule = (rule) => {
return ast.isTerminalRule(rule) && rule.hidden;
};
const ctx = {
rootAssigned: options.dialect === 'EBNF',
hasHiddenRules: grammarsWithName.some(grammar => grammar.rules.some(isHiddenTerminalRule)),
dialect: options.dialect,
commentStyle: options.commentStyle ?? (options.dialect === 'GBNF' ? 'hash' : 'parentheses')
};
const hiddenRules = [];
let result = '';
grammarsWithName.forEach(grammar => {
grammar.rules.forEach(rule => {
result += processRule(rule, ctx);
result += EOL + EOL;
if (isHiddenTerminalRule(rule)) {
hiddenRules.push(rule);
}
});
});
if (hiddenRules.length > 0) {
result += `${processName('HIDDEN', ctx)} ::= ( ${hiddenRules.map(rule => processName(rule.name, ctx)).join(' | ')} )${EOL}`;
}
return result;
}
function processRule(rule, ctx) {
const markRoot = !ctx.rootAssigned && ast.isParserRule(rule) && rule.entry;
if (markRoot) {
ctx.rootAssigned = true;
}
// GBNF expects 'root' as the root rule name, Lark e.g. expects 'start'.
const ruleComment = processComment(rule, ctx);
const hiddenPrefix = (ast.isTerminalRule(rule) && !rule.hidden) ? hiddenRuleCall(ctx) : '';
const ruleName = markRoot ? 'root' : rule.name;
if (ast.isParserRule(rule) && rule.parameters.length > 0) {
// parser rule with parameters
const variations = parserRuleVariations(rule.parameters);
let content = '';
variations.forEach((variation, idx) => {
const variationCtx = { ...ctx, parserRuleVariation: variation };
content += `${ruleComment}${processName(ruleName, variationCtx, variation)} ::= ${hiddenPrefix}${processElement(rule.definition, variationCtx)}`;
if (idx < variations.length - 1) {
content += EOL;
}
});
return content;
}
if (ast.isInfixRule(rule)) {
return `${ruleComment}${processName(ruleName, ctx)} ::= ${hiddenPrefix}${processInfix(rule, ctx)}`;
}
else {
return `${ruleComment}${processName(ruleName, ctx)} ::= ${hiddenPrefix}${processElement(rule.definition, ctx)}`;
}
}
function processInfix(rule, ctx) {
const infixRuleName = processName(rule.name, ctx);
const variation = collectArguments(rule.call.rule.ref, rule.call.arguments, ctx);
const ruleName = rule.call.rule.ref?.name ?? rule.call.rule.$refText;
const call = processName(ruleName, ctx, variation);
const operators = rule.operators.precedences.flatMap(prec => prec.operators.map(op => `"${op.value}"`));
const allOperators = `(${operators.join(' | ')})`;
return `${infixRuleName} ::= ${call} (${allOperators} ${call})*`;
}
function processElement(element, ctx) {
const processRecursively = (element) => {
return processElement(element, ctx);
};
if (ast.isKeyword(element)) {
return `${hiddenRuleCall(ctx)}"${element.value}"`;
}
else if (ast.isGroup(element) || ast.isTerminalGroup(element)) {
if (ast.isGroup(element) && element.guardCondition && !evaluateCondition(element.guardCondition, ctx)) {
// Skip group if guard condition is false
return ' ';
}
const content = element.elements.map(processRecursively).filter(notEmpty).join(' ');
if (element.cardinality && notEmpty(content)) {
return `( ${content} )${processCardinality(element)}`;
}
return content;
}
else if (ast.isAssignment(element)) {
return processRecursively(element.terminal) + processCardinality(element);
}
else if (ast.isRuleCall(element) || ast.isTerminalRuleCall(element)) {
const variation = ast.isRuleCall(element) ? collectArguments(element.rule.ref, element.arguments, ctx) : undefined;
const ruleName = element.rule.ref?.name ?? element.rule.$refText;
return processName(ruleName, ctx, variation) + processCardinality(element);
}
else if (ast.isAlternatives(element) || ast.isTerminalAlternatives(element)) {
const content = element.elements.map(processRecursively).filter(notEmpty).join(' | ');
if (notEmpty(content)) {
return '(' + content + ')' + processCardinality(element);
}
return '';
}
else if (ast.isRegexToken(element)) {
// First remove trailing and leading slashes. Replace escaped slashes `\/` with unescaped slashes `/`.
return element.regex.replace(/(^|[^\\])\//g, (_, p1) => p1 + '').replace(/\\\//g, '/');
}
else if (ast.isCrossReference(element)) {
return (element.terminal ? processRecursively(element.terminal) : 'ID') + processCardinality(element);
}
else if (ast.isAction(element)) {
return '';
}
console.error(`Not handled AbstractElement type: ${element?.$type}`);
return `not-handled-(${element?.$type})`;
}
function processCardinality(element) {
return element.cardinality ?? '';
}
function processName(ruleName, ctx, parserRuleVariation) {
const name = parserRuleVariation
? `${ruleName}${Object.entries(parserRuleVariation)
.filter(entry => entry[1]).map(entry => entry[0].charAt(0).toUpperCase() + entry[0].slice(1))
.join('')}`
: ruleName;
switch (ctx.dialect) {
case 'GBNF':
// convert camel case to Kebab Case for GBNF (GGML AI)
return _.kebabCase(name);
case 'EBNF':
return `<${name}>`;
default:
return name;
}
}
function processComment(rule, ctx) {
const comment = CstUtils.findCommentNode(rule.$cstNode, ['ML_COMMENT'])?.text
?.replace(/\r?\n|\r/g, ' ') // Replace line breaks
?.replace(/^\/\*\s*/, '') // Remove leading `/*`
?.replace(/\s*\*\/$/, ''); // Remove trailing `*/`
if (comment && comment.trim().length > 0) {
switch (ctx.commentStyle) {
case 'skip':
return ' ';
case 'parentheses':
return `(* ${comment} *)${EOL}`;
case 'slash':
return `/* ${comment} */${EOL}`;
case 'hash':
return `# ${comment}${EOL}`;
}
}
return '';
}
/**
* Generates a call to the `HIDDEN` rule with a trailing space, if there are hidden rules in the grammar.
* @param ctx GeneratorContext
* @returns `HIDDEN* ` if there are hidden rules in the grammar.
*/
function hiddenRuleCall(ctx) {
return ctx.hasHiddenRules ? (processName('HIDDEN', ctx) + '* ') : '';
}
function notEmpty(text) {
return text.trim().length > 0;
}
/**
* @param params parserRule parameters
* @returns all possible combination of guards for the parserRule - 2^params.length
*/
function parserRuleVariations(params) {
const variationsCount = Math.pow(2, params.length);
const variations = [];
for (let i = 0; i < variationsCount; i++) {
const variation = {};
params.map((param, index) => {
// eslint-disable-next-line no-bitwise
const isTrue = (i & (1 << index)) !== 0;
return variation[param.name] = isTrue;
});
variations.push(variation);
}
return variations;
}
function collectArguments(rule, namedArgs, ctx) {
if (ast.isParserRule(rule) && namedArgs.length > 0 && rule.parameters.length === namedArgs.length) {
const variation = {};
namedArgs.forEach((arg, idx) => {
variation[rule.parameters[idx].name] = evaluateCondition(arg.value, ctx);
});
return variation;
}
return undefined;
}
function evaluateCondition(condition, ctx) {
switch (condition.$type) {
case 'BooleanLiteral':
return condition.true;
case 'Conjunction':
return evaluateCondition(condition.left, ctx) && evaluateCondition(condition.right, ctx);
case 'Disjunction':
return evaluateCondition(condition.left, ctx) || evaluateCondition(condition.right, ctx);
case 'Negation':
return !evaluateCondition(condition.value, ctx);
case 'ParameterReference': {
if (!ctx.parserRuleVariation) {
return false;
}
return ctx.parserRuleVariation[condition.parameter.ref?.name ?? condition.parameter.$refText];
}
default:
throw new Error(`Unhandled Condition type: ${condition.$type}`);
}
}
//# sourceMappingURL=bnf-generator.js.map