@finos/legend-code-editor
Version:
Legend shared advanced application components and building blocks
373 lines • 14.9 kB
JavaScript
/**
* Copyright (c) 2020-present, Goldman Sachs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* eslint-disable prefer-named-capture-group */
import { PURE_ELEMENT_NAME, PURE_CONNECTION_NAME } from '@finos/legend-graph';
import { languages as monacoLanguagesAPI } from 'monaco-editor';
import { PURE_GRAMMAR_TOKEN } from './PureLanguage.js';
import { CODE_EDITOR_LANGUAGE } from './CodeEditorUtils.js';
/**
* The postfix to be added to all token types, i.e. identifier.pure, number.pure, etc.
*/
const PURE_GRAMMAR_TOKEN_POSTFIX = '.pure';
// Taken from `monaco-languages` configuration for Java in order to do propert brace matching
// See https://github.com/microsoft/monaco-languages/blob/master/src/java/java.ts
const configuration = {
// NOTE: Pure identifier includes $ but not in the first position (as that is parsed as a variable)
wordPattern: /(-?\d*\.\d\w*)|([^`~!@#%^$&*()\-=+[{\]}\\|;:'",.<>/?\s][^`~!@#%^&*()\-=+[{\]}\\|;:'",.<>/?\s]*)/,
comments: {
lineComment: '//',
blockComment: ['/*', '*/'],
},
brackets: [
['{', '}'],
['[', ']'],
['(', ')'],
],
autoClosingPairs: [
{ open: '{', close: '}' },
{ open: '[', close: ']' },
{ open: '(', close: ')' },
{ open: '"', close: '"' },
{ open: "'", close: "'" },
],
surroundingPairs: [
{ open: '{', close: '}' },
{ open: '[', close: ']' },
{ open: '(', close: ')' },
{ open: '"', close: '"' },
{ open: "'", close: "'" },
{ open: '<', close: '>' },
{ open: '<<', close: '>>' },
],
folding: {
markers: {
start: new RegExp('^\\s*//\\s*(?:(?:#?region\\b)|(?:<editor-fold\\b))'),
end: new RegExp('^\\s*//\\s*(?:(?:#?endregion\\b)|(?:</editor-fold>))'),
},
},
};
/**
* Create new monarch definition to support syntax-highlighting
* See https://microsoft.github.io/monaco-editor/monarch.html
*
* The way SQL monarch definition is organized is good and worth learning from
* See https://github.com/microsoft/monaco-languages/blob/master/src/sql/sql.ts
*
* NOTE: using `monarch` only allows fairly very basic syntax-highlighting
* to actually do full AST analysis, we might need something more serious like
* using TextMate grammar which is used by VSCode itself
* See https://github.com/microsoft/monaco-editor#faq
* See https://code.visualstudio.com/api/language-extensions/syntax-highlight-guide
*/
const generateLanguageMonarch = (extraKeywords) =>
// TODO: complete syntax-highlighter for core features like constraint, derived properties, etc.
// TODO: add syntax highlighting for modules/plugins (come up with a plugin mechanism to do this).
({
defaultToken: 'invalid',
tokenPostfix: PURE_GRAMMAR_TOKEN_POSTFIX,
keywords: [
...extraKeywords,
// relational
'Schema',
'Table',
'Join',
'View',
'primaryKey',
'groupBy',
'mainTable',
// native
'let',
'extends',
'true',
'false',
'projects',
// elements
PURE_ELEMENT_NAME.CLASS,
PURE_ELEMENT_NAME.ASSOCIATION,
PURE_ELEMENT_NAME.ENUMERATION,
PURE_ELEMENT_NAME.MEASURE,
PURE_ELEMENT_NAME.PROFILE,
PURE_ELEMENT_NAME.FUNCTION,
PURE_ELEMENT_NAME.MAPPING,
PURE_ELEMENT_NAME.RUNTIME,
PURE_ELEMENT_NAME.CONNECTION,
PURE_ELEMENT_NAME.FILE_GENERATION,
PURE_ELEMENT_NAME.GENERATION_SPECIFICATION,
PURE_ELEMENT_NAME.DATA_ELEMENT,
// connections
PURE_CONNECTION_NAME.JSON_MODEL_CONNECTION,
PURE_CONNECTION_NAME.MODEL_CHAIN_CONNECTION,
PURE_CONNECTION_NAME.XML_MODEL_CONNECTION,
// mapping
'include',
'EnumerationMapping',
'Pure',
'AssociationMapping',
'XStore',
'AggregationAware',
/**
* @modularize
* See https://github.com/finos/legend-studio/issues/65
*/
PURE_ELEMENT_NAME.SERVICE,
PURE_ELEMENT_NAME.FLAT_DATA,
PURE_ELEMENT_NAME.DATABASE,
PURE_CONNECTION_NAME.FLAT_DATA_CONNECTION,
PURE_CONNECTION_NAME.RELATIONAL_DATABASE_CONNECTION,
'Relational',
],
operators: [
'=',
'>',
'<',
'!',
'~',
'?',
':',
'==',
'<=',
'>=',
'&&',
'||',
'++',
'--',
'+',
'-',
'*',
'/',
'&',
'|',
'^',
'%',
'->',
'#{',
'}#',
'@',
'<<',
'>>',
],
languageStructs: ['import', 'native'],
// common regular expressions to be used in tokenizer
identifier: /[a-zA-Z_$][\w$]*/,
symbols: /[=><!~?:&|+\-*/^%#@]+/,
escapes: /\\(?:[abfnrtv\\"']|x[0-9A-Fa-f]{1,4}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/,
digits: /\d+(_+\d+)*/,
octaldigits: /[0-7]+(_+[0-7]+)*/,
binarydigits: /[0-1]+(_+[0-1]+)*/,
hexdigits: /[[0-9a-fA-F]+(_+[0-9a-fA-F]+)*/,
multiplicity: /\[(?:[a-zA-Z0-9]+(?:\.\.(?:[a-zA-Z0-9]+|\*|))?|\*)\]/,
package: /(?:[\w_]+::)+/,
// NOTE: generics is a little tricky because in order to do it right, we have to
// do some sort of bracket matching, but we just can use a simple tokenizer here
// so to account for cases like `<Nil,Any|*>)->` `Function<{T[1]->Boolean[1]}>[1]`
// we have to make sure the content does not contain any `:` or `.` characters
// in order to avoid the accidental greedy match with inputs like
// `function doSomething<T>(a: Function<T[1]->Boolean[1]>)`
// nor we want to make sure the last character of the content is not `-` to avoid
// accidentally matching `->` as the end of the generics
generics: /(?:(?:<\w+>)|(?:<[^:.@^()]+[^-]>))/,
date: /%-?\d+(?:-\d+(?:-\d+(?:T(?:\d+(?::\d+(?::\d+(?:.\d+)?)?)?)(?:[+-][0-9]{4})?)))/,
time: /%\d+(?::\d+(?::\d+(?:.\d+)?)?)?/,
tokenizer: {
root: [
// NOTE: since `monaco-editor` Monarch is only meant for tokenizing
// and the need to highlight Pure syntax is more than just token-based,
// but semantic/syntax-based we have to create these complex rules.
// the things to note here is these are not meant to match multilines
// and they must be placed before identifier rules since token matching
// is run in order
// See https://github.com/microsoft/monaco-editor/issues/316#issuecomment-273555698
// See https://github.com/microsoft/monaco-editor/issues/571#issuecomment-342555050
// See https://microsoft.github.io/monaco-editor/monarch.html
{ include: '@pure' },
{ include: '@date' },
{ include: '@color' },
// parser markers
[
// NOTE: any leading whitespace to the section header is considered invalid syntax
/^\s*###[\w]+/,
PURE_GRAMMAR_TOKEN.PARSER,
],
// identifiers and keywords
[
/(@identifier)/,
{
cases: {
'@languageStructs': PURE_GRAMMAR_TOKEN.LANGUAGE_STRUCT,
'@keywords': `${PURE_GRAMMAR_TOKEN.KEYWORD}.$0`,
// function descriptor
'([a-zA-Z_$][\\w$]*)_((\\w+_(([a-zA-Z0-9]+)|(\\$[a-zA-Z0-9]+_[a-zA-Z0-9]+\\$)))__)*(\\w+_(([a-zA-Z0-9]+)|(\\$[a-zA-Z0-9]+_[a-zA-Z0-9]+\\$)))_': PURE_GRAMMAR_TOKEN.TYPE,
'@default': PURE_GRAMMAR_TOKEN.IDENTIFIER,
},
},
],
// whitespace
{ include: '@whitespace' },
// delimiters and operators
[/[{}()[\]]/, '@brackets'],
[/[<>](?!@symbols)/, '@brackets'],
[
/@symbols/,
{
cases: {
'@operators': PURE_GRAMMAR_TOKEN.OPERATOR,
'@default': PURE_GRAMMAR_TOKEN.IDENTIFIER,
},
},
],
{ include: '@number' },
// delimiter: after number because of .\d floats
[/[;,.]/, PURE_GRAMMAR_TOKEN.DELIMITER],
// strings
// NOTE: including non-teminated string so as people type ', we can start showing them that they're working on a string
[/'([^'\\]|\\.)*$/, `${PURE_GRAMMAR_TOKEN.STRING}.invalid`],
[/'/, PURE_GRAMMAR_TOKEN.STRING, '@string'],
{ include: '@characters' },
],
pure: [
// type
[/(@package\*)/, [PURE_GRAMMAR_TOKEN.PACKAGE]], // import path
[
/(@package?)(@identifier)(@generics?)(\s*)(@multiplicity)/,
[
PURE_GRAMMAR_TOKEN.PACKAGE,
PURE_GRAMMAR_TOKEN.TYPE,
PURE_GRAMMAR_TOKEN.GENERICS,
PURE_GRAMMAR_TOKEN.WHITESPACE,
PURE_GRAMMAR_TOKEN.MULTIPLICITY,
],
],
[
/(@package)(@identifier)(@generics?)/,
[
PURE_GRAMMAR_TOKEN.PACKAGE,
PURE_GRAMMAR_TOKEN.TYPE,
PURE_GRAMMAR_TOKEN.GENERICS,
],
],
// special operators that uses type (e.g. constructor, cast)
[
/([@^])(\s*)(@package?)(@identifier)(@generics?)(@multiplicity?)/,
[
`${PURE_GRAMMAR_TOKEN.TYPE}.operator`,
PURE_GRAMMAR_TOKEN.WHITESPACE,
PURE_GRAMMAR_TOKEN.PACKAGE,
PURE_GRAMMAR_TOKEN.TYPE,
PURE_GRAMMAR_TOKEN.GENERICS,
PURE_GRAMMAR_TOKEN.MULTIPLICITY,
],
],
// property / parameter
[
/(\.\s*)(@identifier)/,
[PURE_GRAMMAR_TOKEN.DELIMITER, PURE_GRAMMAR_TOKEN.PROPERTY],
],
[
/(@identifier)(\s*=)/,
[PURE_GRAMMAR_TOKEN.PROPERTY, PURE_GRAMMAR_TOKEN.OPERATOR],
],
[
/(@identifier)(\.)(@identifier)/,
[
PURE_GRAMMAR_TOKEN.TYPE,
PURE_GRAMMAR_TOKEN.OPERATOR,
PURE_GRAMMAR_TOKEN.PROPERTY,
],
], // could be: property chain, profile tag, and stereotype
[
/(@identifier)(\s*:)/,
[PURE_GRAMMAR_TOKEN.PARAMETER, PURE_GRAMMAR_TOKEN.OPERATOR],
],
// variables
[
/(let)(\s+)(@identifier)(\s*=)/,
[
PURE_GRAMMAR_TOKEN.KEYWORD,
PURE_GRAMMAR_TOKEN.WHITESPACE,
PURE_GRAMMAR_TOKEN.VARIABLE,
PURE_GRAMMAR_TOKEN.OPERATOR,
],
],
[/(\$@identifier)/, [`${PURE_GRAMMAR_TOKEN.VARIABLE}.reference`]],
],
date: [
[/(%latest)/, [`${PURE_GRAMMAR_TOKEN.DATE}.latest`]],
[/(@date)/, [PURE_GRAMMAR_TOKEN.DATE]],
[/(@time)/, [`${PURE_GRAMMAR_TOKEN.DATE}.time`]],
],
color: [[/(#[0-9a-fA-F]{6})/, [PURE_GRAMMAR_TOKEN.COLOR]]],
number: [
[
/(@digits)[eE]([-+]?(@digits))?[fFdD]?/,
`${PURE_GRAMMAR_TOKEN.NUMBER}.float`,
],
[
/(@digits)\.(@digits)([eE][-+]?(@digits))?[fFdD]?/,
`${PURE_GRAMMAR_TOKEN.NUMBER}.float`,
],
[/0[xX](@hexdigits)[Ll]?/, `${PURE_GRAMMAR_TOKEN.NUMBER}.hex`],
[/0(@octaldigits)[Ll]?/, `${PURE_GRAMMAR_TOKEN.NUMBER}.octal`],
[/0[bB](@binarydigits)[Ll]?/, `${PURE_GRAMMAR_TOKEN.NUMBER}.binary`],
[/(@digits)[fFdD]/, `${PURE_GRAMMAR_TOKEN.NUMBER}.float`],
[/(@digits)[lL]?/, PURE_GRAMMAR_TOKEN.NUMBER],
],
whitespace: [
[/[ \t\r\n]+/, PURE_GRAMMAR_TOKEN.WHITESPACE],
[/\/\*\*(?!\/)/, `${PURE_GRAMMAR_TOKEN.COMMENT}.doc`, '@doc'],
[/\/\*/, PURE_GRAMMAR_TOKEN.COMMENT, '@comment'],
[/\/\/.*$/, PURE_GRAMMAR_TOKEN.COMMENT],
],
comment: [
[/[^/*]+/, PURE_GRAMMAR_TOKEN.COMMENT],
// [/\/\*/, PURE_GRAMMAR_TOKEN.COMMENT, '@push' ], // nested comment not allowed :-(
// [/\/\*/, ${PURE_GRAMMAR_TOKEN.COMMENT}.invalid` ], // this breaks block comments in the shape of /* //*/
[/\*\//, PURE_GRAMMAR_TOKEN.COMMENT, '@pop'],
[/[/*]/, PURE_GRAMMAR_TOKEN.COMMENT],
],
// Identical copy of comment above, except for the addition of .doc
doc: [
[/[^/*]+/, `${PURE_GRAMMAR_TOKEN.COMMENT}.doc`],
// [/\/\*/, `${PURE_GRAMMAR_TOKEN.COMMENT}.doc`, '@push' ], // nested comment not allowed :-(
[/\/\*/, `${PURE_GRAMMAR_TOKEN.COMMENT}.doc.invalid`],
[/\*\//, `${PURE_GRAMMAR_TOKEN.COMMENT}.doc`, '@pop'],
[/[/*]/, `${PURE_GRAMMAR_TOKEN.COMMENT}.doc`],
],
string: [
[/[^\\']+/, PURE_GRAMMAR_TOKEN.STRING],
[/@escapes/, `${PURE_GRAMMAR_TOKEN.STRING}.escape`],
[/\\./, `${PURE_GRAMMAR_TOKEN.STRING}.escape.invalid`],
[/'/, PURE_GRAMMAR_TOKEN.STRING, '@pop'],
],
characters: [
[/'[^\\']'/, PURE_GRAMMAR_TOKEN.STRING],
[
/(')(@escapes)(')/,
[
PURE_GRAMMAR_TOKEN.STRING,
`${PURE_GRAMMAR_TOKEN.STRING}.escape`,
PURE_GRAMMAR_TOKEN.STRING,
],
],
[/'/, `${PURE_GRAMMAR_TOKEN.STRING}.invalid`],
],
},
});
export function setupPureLanguageService(options) {
monacoLanguagesAPI.register({ id: CODE_EDITOR_LANGUAGE.PURE });
monacoLanguagesAPI.setLanguageConfiguration(CODE_EDITOR_LANGUAGE.PURE, configuration);
monacoLanguagesAPI.setMonarchTokensProvider(CODE_EDITOR_LANGUAGE.PURE, generateLanguageMonarch(options?.extraKeywords ?? []));
}
//# sourceMappingURL=PureLanguageService.js.map