langium-cli
Version:
CLI for Langium - the language engineering tool
206 lines (189 loc) • 7.6 kB
text/typescript
/******************************************************************************
* Copyright 2021-2022 TypeFox GmbH
* This program and the accompanying materials are made available under the
* terms of the MIT License, which is available in the project root.
******************************************************************************/
import type { Grammar } from 'langium';
import { GrammarAST, GrammarUtils, RegExpUtils, stream } from 'langium';
import type { LangiumLanguageConfig } from '../../package-types.js';
import { collectKeywords } from '../langium-util.js';
/* eslint-disable dot-notation */
interface TextMateGrammar {
repository: Repository;
readonly scopeName: string;
readonly patterns: Pattern[];
readonly injections?: { [expression: string]: Pattern };
readonly injectionSelector?: string;
readonly fileTypes?: string[];
readonly name?: string;
readonly firstLineMatch?: string;
}
interface Repository {
[name: string]: Pattern;
}
interface Pattern {
id?: number;
readonly include?: string;
readonly name?: string;
readonly contentName?: string;
readonly match?: string;
readonly captures?: Captures;
readonly begin?: string;
readonly beginCaptures?: Captures;
readonly end?: string;
readonly endCaptures?: Captures;
readonly while?: string;
readonly whileCaptures?: Captures;
readonly patterns?: Pattern[];
readonly repository?: Repository;
readonly applyEndPatternLast?: boolean;
}
interface Captures {
[captureId: string]: Pattern;
}
export function generateTextMate(grammar: Grammar, config: LangiumLanguageConfig): string {
const json: TextMateGrammar = {
name: config.id,
scopeName: `source.${config.id}`,
fileTypes: config.fileExtensions ?? [],
patterns: getPatterns(grammar, config),
repository: getRepository(grammar, config)
};
return JSON.stringify(json, null, 2) + '\n';
}
function getPatterns(grammar: Grammar, config: LangiumLanguageConfig): Pattern[] {
const patterns: Pattern[] = [];
patterns.push({
include: '#comments'
});
patterns.push(getControlKeywords(grammar, config));
patterns.push(...getStringPatterns(grammar, config));
return patterns;
}
function getRepository(grammar: Grammar, config: LangiumLanguageConfig): Repository {
const repository: Repository = {};
const commentPatterns: Pattern[] = [];
let stringEscapePattern: Pattern | undefined;
for (const rule of grammar.rules) {
if (GrammarAST.isTerminalRule(rule) && GrammarUtils.isCommentTerminal(rule)) {
const parts = RegExpUtils.getTerminalParts(GrammarUtils.terminalRegex(rule));
for (const part of parts) {
if (part.end) {
commentPatterns.push({
'name': `comment.block.${config.id}`,
'begin': part.start,
'beginCaptures': {
'0': {
'name': `punctuation.definition.comment.${config.id}`
}
},
'end': part.end,
'endCaptures': {
'0': {
'name': `punctuation.definition.comment.${config.id}`
}
}
});
} else {
commentPatterns.push({
'begin': part.start,
'beginCaptures': {
'1': {
'name': `punctuation.whitespace.comment.leading.${config.id}`
}
},
'end': '(?=$)',
'name': `comment.line.${config.id}`
});
}
}
} else if (GrammarAST.isTerminalRule(rule) && rule.name.toLowerCase() === 'string') {
stringEscapePattern = {
'name': `constant.character.escape.${config.id}`,
'match': '\\\\(x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|u\\{[0-9A-Fa-f]+\\}|[0-2][0-7]{0,2}|3[0-6][0-7]?|37[0-7]?|[4-7][0-7]?|.|$)'
};
}
}
if (commentPatterns.length > 0) {
repository['comments'] = {
'patterns': commentPatterns
};
}
if (stringEscapePattern) {
repository['string-character-escape'] = stringEscapePattern;
}
return repository;
}
function getControlKeywords(grammar: Grammar, pack: LangiumLanguageConfig): Pattern {
const regex = /[A-Za-z]/;
const controlKeywords = collectKeywords(grammar).filter(kw => regex.test(kw));
const groups = groupKeywords(controlKeywords);
return {
'name': `keyword.control.${pack.id}`,
'match': `${pack.caseInsensitive ? '(?i)' : ''}${groups.join('|')}`
};
}
function groupKeywords(keywords: string[]): string[] {
const groups: {
letter: string[],
leftSpecial: string[],
rightSpecial: string[],
special: string[];
} = { letter: [], leftSpecial: [], rightSpecial: [], special: [] };
keywords.forEach(keyword => {
const keywordPattern = RegExpUtils.escapeRegExp(keyword);
if (/\w/.test(keyword[0])) {
if (/\w/.test(keyword[keyword.length - 1])) {
groups.letter.push(keywordPattern);
} else {
groups.rightSpecial.push(keywordPattern);
}
} else {
if ((/\w/).test(keyword[keyword.length - 1])) {
groups.leftSpecial.push(keywordPattern);
} else {
groups.special.push(keywordPattern);
}
}
});
const res = [];
if (groups.letter.length) res.push(`\\b(${groups.letter.join('|')})\\b`);
if (groups.leftSpecial.length) res.push(`\\B(${groups.leftSpecial.join('|')})\\b`);
if (groups.rightSpecial.length) res.push(`\\b(${groups.rightSpecial.join('|')})\\B`);
if (groups.special.length) res.push(`\\B(${groups.special.join('|')})\\B`);
return res;
}
function getStringPatterns(grammar: Grammar, pack: LangiumLanguageConfig): Pattern[] {
const terminals = stream(grammar.rules).filter(GrammarAST.isTerminalRule);
const stringTerminal = terminals.find(e => e.name.toLowerCase() === 'string');
const stringPatterns: Pattern[] = [];
if (stringTerminal) {
const parts = RegExpUtils.getTerminalParts(GrammarUtils.terminalRegex(stringTerminal));
for (const part of parts) {
if (part.end) {
stringPatterns.push({
'name': `string.quoted.${delimiterName(part.start)}.${pack.id}`,
'begin': part.start,
'end': part.end,
'patterns': [
{
'include': '#string-character-escape'
}
]
});
}
}
}
return stringPatterns;
}
function delimiterName(delimiter: string): string {
if (delimiter === "'") {
return 'single';
} else if (delimiter === '"') {
return 'double';
} else if (delimiter === '`') {
return 'backtick';
} else {
return 'delimiter';
}
}