@sap/cds-compiler
Version:
CDS (Core Data Services) compiler and backends
1,446 lines (1,323 loc) • 54.9 kB
JavaScript
// Generic ANTLR parser class with AST-building functions
// To have an AST also in the case of syntax errors, produce it by adding
// sub-nodes to a parent node, not by returning sub-ASTs (the latter is fine
// for secondary attachments).
'use strict';
const antlr4 = require('antlr4');
const { ATNState } = require('antlr4/src/antlr4/atn/ATNState');
const { DEFAULT: CommonTokenFactory } = require('antlr4/src/antlr4/CommonTokenFactory');
const { dictAdd, dictAddArray } = require('../base/dictionaries');
const locUtils = require('../base/location');
const { parseDocComment } = require('./docCommentParser');
const { parseMultiLineStringLiteral } = require('./multiLineStringParser');
const {
specialFunctions,
quotedLiteralPatterns,
} = require('../compiler/builtins');
const { functionsWithoutParentheses } = require('../parsers/identifiers');
const { Location } = require('../base/location');
const { pathName } = require('../compiler/utils');
const { XsnArtifact, XsnName, XsnSource } = require('../compiler/xsn-model');
const { isBetaEnabled } = require('../base/model');
const { weakLocation } = require('../base/location');
const { normalizeNewLine, normalizeNumberString } = require('./textUtils');
const $location = Symbol.for('cds.$location');
// Push message `msg` with location `loc` to array of errors:
function _message( parser, severity, id, loc, ...args ) {
const msg = parser.$messageFunctions[severity]; // set in antlrParser.js
if (loc instanceof antlr4.CommonToken)
loc = parser.tokenLocation(loc);
return msg( id, loc, ...args );
}
// Class which is to be used as grammar option with
// grammar <name> options { superclass = genericAntlrParser; }
//
// The individual AST building functions are to be used with
// this.<function>(...)
// in the actions inside the grammar.
//
class GenericAntlrParser extends antlr4.Parser {
constructor( ...args ) {
// ANTLR restriction: we cannot add parameters to the constructor.
super( ...args );
this.buildParseTrees = false;
// Common properties.
// We set them here so that they are available in the prototype.
// This improved performance by 25% for certain scenario tests.
// Probably because there was no need to look up the prototype chain anymore.
this.$adaptExpectedToken = null;
this.$adaptExpectedExcludes = [ ];
this.$nextTokensToken = null;
this.$nextTokensContext = null;
this.options = {};
this.genericFunctionsStack = [];
this.$genericKeywords = specialFunctions[''][1];
}
}
// TODO: Use actual methods.
Object.assign(GenericAntlrParser.prototype, {
message(...args) {
return _message( this, 'message', ...args );
},
error(...args) {
return _message( this, 'error', ...args );
},
warning(...args) {
return _message( this, 'warning', ...args );
},
info(...args) {
return _message( this, 'info', ...args );
},
isBetaEnabled,
attachLocation,
assignAnnotation,
addAnnotation,
expressionAsAnnotationValue,
checkExtensionDict,
handleDuplicateExtension,
startLocation,
tokenLocation,
isMultiLineToken,
fixMultiLineTokenEndLocation,
valueWithTokenLocation,
previousTokenAtLocation,
combinedLocation,
surroundByParens,
tokensToStringRepresentation,
secureParens,
unaryOpForParens,
leftAssocBinaryOp,
classifyImplicitName,
warnIfColonFollows,
fragileAlias,
identAst,
reportPathNamedManyOrOne,
reportVirtualAsRef,
reportMissingSemicolon,
pushXprToken,
pushOpToken,
argsExpression,
valuePathAst,
fixNewKeywordPlacement,
signedExpression,
numberLiteral,
unsignedIntegerLiteral,
assignAnnotationValue,
quotedLiteral,
pathName,
docComment,
addDef,
addItem,
addExtension,
createSource,
createDict,
createArray,
finalizeDictOrArray,
insertSemicolon,
setMaxCardinality,
setNullability,
reportDuplicateClause,
reportUnexpectedExtension,
reportUnexpectedSpace,
pushIdent,
pushItem,
handleComposition,
associationInSelectItem,
reportExpandInline,
checkTypeFacet,
checkTypeArgs,
csnParseOnly,
markAsSkippedUntilEOF,
noAssignmentInSameLine,
noSemicolonHere,
setLocalToken,
setLocalTokenIfBefore,
setLocalTokenForId,
excludeExpected,
isStraightBefore,
meltKeywordToIdentifier,
prepareGenericKeywords,
reportErrorForGenericKeyword,
parseMultiLineStringLiteral,
XsnArtifact,
XsnName,
});
// Use the following function for language constructs which we (currently)
// just being able to parse, in able to run tests from HANA CDS. As soon as we
// create ASTs for the language construct and put it into a CSN, a
// corresponding check should actually be inside the compiler, because the same
// language construct can come from a CSN as source.
// TODO: this is not completely done this way
// Use the following function for language constructs which we (currently) do
// not really compile, just use to produce a CSN for functions parse.cql() and
// parse.expr().
// This function has a similar interface to our message functions on purpose!
// (tokens ~= location)
function csnParseOnly( msgId, tokens, textArgs ) {
if (!msgId || this.options.parseOnly)
return;
const loc = this.tokenLocation( tokens[0], tokens[tokens.length - 1] );
this.error( msgId, loc, textArgs );
}
/**
* Do not propose a `;` or closing brace `}` at this position.
*
* Attention: May conflict with excludeExpected()!
*
* @this {object}
* */
function noSemicolonHere() {
const handler = this._errHandler;
const t = this.getCurrentToken();
this.$adaptExpectedToken = t;
this.$adaptExpectedExcludes = [ "';'", "'}'" ];
this.$nextTokensToken = t;
this.$nextTokensContext = null; // match() of WITH does not reset
this.$nextTokensState = ATNState.INVALID_STATE_NUMBER;
if (t.text === ';' && handler && handler.reportIgnoredWith )
handler.reportIgnoredWith( this, t );
}
/**
* Using this function "during ATN decision making" has no effect
* In front of an ATN decision, you might specify dedicated excludes
* for non-LA1 tokens via a sub-array in excludes[0].
* TODO: consider $nextTokens…, see commented use in rule `elementProperties`
*
* Usage Note:
* Must be used at all positions where sync() is called in the generated coding.
* ```antlr4
* { this.excludeExpected(['ACTIONS']); }
* ( WITH { this.excludeExpected(['ACTIONS']); } )?
* annotationAssignment_ll1[ $art ]* { this.excludeExpected(['ACTIONS']); }
* ACTIONS
* ```
*/
function excludeExpected( excludes ) {
if (excludes) {
// @ts-ignore
const t = this.getCurrentToken();
this.$adaptExpectedToken = t;
this.$adaptExpectedExcludes = Array.isArray(excludes) ? excludes : [ excludes ];
this.$nextTokensToken = t;
this.$nextTokensContext = null;
}
}
function setLocalToken( string, tokenName, notBefore, inSameLine ) {
const ll1 = this.getCurrentToken();
if (ll1.text.toUpperCase() === string &&
(!inSameLine || this._input.LT(-1).line === ll1.line) &&
(!notBefore || !notBefore.test( this._input.LT(2).text )))
ll1.type = this.constructor[tokenName];
}
function setLocalTokenIfBefore( string, tokenName, before, inSameLine ) {
const ll1 = this.getCurrentToken();
if (ll1.text.toUpperCase() === string &&
(!inSameLine || this._input.LT(-1).line === ll1.line) &&
(!before || before && before.test( this._input.LT(2).text )))
ll1.type = this.constructor[tokenName];
}
function setLocalTokenForId( offset, tokenNameMap ) {
const tokenName = tokenNameMap[this._input.LT( offset ).text.toUpperCase() || ''];
const ll1 = this.getCurrentToken();
if (tokenName &&
(ll1.type === this.constructor.Identifier || /^[a-zA-Z_]+$/.test( ll1.text )))
ll1.type = this.constructor[tokenName];
return !!tokenName;
}
// // Special function for rule `requiredSemi` before return $ctx
// function braceForSemi() {
// if (RBRACE == null)
// RBRACE = this.literalNames.indexOf( "'}'" );
// console.log(RBRACE)
// // we are called before match('}') and this.state = ...
// let atn = this._interp.atn;
// console.log( atn.nextTokens( atn.states[ this.state ], this._ctx ) )
// let next = atn.states[ this.state ].transitions[0].target;
// // if a '}' is not possible in the grammar after the fake-'}', throw error
// if (!atn.nextTokens( next, this._ctx ).contains(RBRACE))
// console.log( atn.nextTokens( next, this._ctx ) )
// // throw new antlr4.error.InputMismatchException(this);
// }
function markAsSkippedUntilEOF() {
let t = this.getCurrentToken();
if (t.type === antlr4.Token.EOF)
return;
if (!t.$isSkipped && !this._errHandler.inErrorRecoveryMode( this )) {
// If not already done, we should report an error if we do not see EOF. We cannot
// use match() here, because these would consume tokens without marking them.
this._errHandler.reportUnwantedToken( this, [ '<EOF>' ] );
t.$isSkipped = 'offending';
this.consume();
t = this.getCurrentToken();
}
while (t.type !== antlr4.Token.EOF) {
t.$isSkipped = true;
this.consume();
t = this.getCurrentToken();
}
}
function noAssignmentInSameLine() {
const t = this.getCurrentToken();
if (t.text === '@' && t.line <= this._input.LT(-1).line) {
// TODO: use 'syntax-missing-newline'
this.warning( 'syntax-missing-semicolon', t, { code: ';' },
// eslint-disable-next-line @stylistic/js/max-len
'Add a $(CODE) and/or newline before the annotation assignment to indicate that it belongs to the next statement' );
}
}
// Use after matching ',' to allow ',' in front of the closing paren. Be sure
// that you know what to do if successful - break/return/... = check the
// generated grammar; inside loops, you can use `break`. This function is
// still the preferred way to express an optional ',' at the end, because it
// does not influence the error reporting. It might also allow to match
// reserved keywords, because there is no ANTLR generated decision in front of it.
function isStraightBefore( closing ) {
return this.getCurrentToken().text === closing;
}
function meltKeywordToIdentifier( exceptTrueFalseNull = false ) {
const { Identifier } = this.constructor;
const token = this.getCurrentToken() || { type: Identifier };
if (token.type < Identifier && /^[a-z]+$/i.test( token.text ) &&
!(exceptTrueFalseNull && /^(true|false|null)$/i.test( token.text )))
token.type = Identifier;
}
const genericTokenTypes = {
expr: 'GenericExpr',
separator: 'GenericSeparator',
intro: 'GenericIntro',
};
/**
* @memberOf GenericAntlrParser
*
* @param pathItem
* @param [expected]
*/
function prepareGenericKeywords( pathItem, expected = null ) {
const length = pathItem?.args?.length || 0;
const argPos = length;
const func = pathItem?.id && specialFunctions[pathItem.id.toUpperCase()];
const spec = func && func[argPos] || specialFunctions[''][argPos ? 1 : 0];
this.$genericKeywords = spec;
// @ts-ignore
const token = this.getCurrentToken() || { text: '' };
const text = token.text.toUpperCase();
let generic = spec[text];
// console.log('PGK:',token.text,generic,expected,spec,func,argPos)
if (expected) { // 'separator' or 'expr' (after 'separator')
if (generic !== expected)
return;
}
else if (!generic || generic === 'separator') {
// Mismatch at beginning (or just an expression): keep token type
// (if not expression, issue error and consider the token to be an
// expression replacement, like ALL)
return;
}
else if (generic === 'expr' && spec.intro && spec.intro.includes( text )) {
// token is both an intro and an expression, like LEADING for TRIM
const next = this._input.LT(2).text;
if (!next || // followed by EOF -> consider it to be 'intro', better for CC
next !== ',' && next !== ')' && spec[next.toUpperCase()] !== 'separator')
generic = 'intro'; // is intro if next token is not separator, not ',', ')'
}
// @ts-ignore
token.type = this.constructor[genericTokenTypes[generic]];
}
// To be called before having matched ( HideAlternatives | … )
function reportErrorForGenericKeyword() {
this._errHandler.reportUnwantedToken( this );
// this._errHandler.reportInputMismatch( this, { offending: this._input.LT(1) }, null );
}
// Attach location matched by current rule to node `art`. If a location is
// already provided, only set the end location. Use this function only
// in @after actions of parser rules, as the end position is only available
// there.
function attachLocation( art ) {
if (!art || art.$parens)
return art;
if (!art.location) {
art.location = this.tokenLocation(this._ctx.start, this._ctx.stop);
return art;
}
if (!this._ctx.stop)
return art;
// The last token (this._ctx.stop) may be a multi-line string literal, in which
// case we can't rely on `this._ctx.stop.line`.
if (this.isMultiLineToken(this._ctx.stop)) {
this.fixMultiLineTokenEndLocation(this._ctx.stop, art.location);
}
else {
const { stop } = this._ctx;
art.location.endLine = stop.line;
// after the last char (special for EOF?)
art.location.endCol = stop.stop - stop.start + stop.column + 2;
}
return art;
}
function assignAnnotation( art, anno, prefix = '' ) {
const { name, $flatten } = anno;
const { path } = name;
if (path.broken || !path[path.length - 1].id)
return;
const pathname = pathName( path );
let absolute = '';
if (name.variant) {
const variant = pathName( name.variant.path );
absolute = `${ prefix }${ pathname }#${ variant }`;
// We do not care anymore whether we get a second '#' with flattening. This
// can be produced via CSN and with delimited ids anyway. If backends care,
// they need to have their own check.
}
else if (!prefix || pathname !== '$value') {
absolute = `${ prefix }${ pathname }`;
}
else {
absolute = prefix.slice( 0, -1 ); // remove final dot
}
if ($flatten) {
for (const a of $flatten)
this.assignAnnotation( art, a, `${ absolute }.` );
}
else {
name.id = absolute;
this.addAnnotation( art, `@${ absolute }`, anno );
}
if (!prefix) { // set deprecated $annotations for cds-lsp
if (!art.$annotations)
art.$annotations = [];
const location = locUtils.combinedLocation( anno.name, anno );
art.$annotations.push( { value: anno, location } );
}
}
function addAnnotation( art, prop, anno ) {
const old = art[prop];
if (old) {
this.error( 'syntax-duplicate-anno', old.name.location, { anno: prop },
'Assignment for $(ANNO) is overwritten by another one below' );
}
art[prop] = anno;
}
const extensionDicts = {
elements: true, enum: true, params: true, returns: true,
};
function checkExtensionDict( dict ) {
for (const name in dict) {
const def = dict[name];
if (!def.$duplicates)
continue;
if (def.kind !== 'annotate') {
const numDefines
= def.$duplicates.reduce( addOneForDefinition, addOneForDefinition( 0, def ) );
this.handleDuplicateExtension( def, name, numDefines );
for (const dup of def.$duplicates)
this.handleDuplicateExtension( dup, name, numDefines );
continue;
}
// move annotations, 'doc' and 'elements' etc to main member
for (const dup of def.$duplicates) {
for (const prop of Object.keys( dup )) {
if (prop.charAt(0) === '@') {
this.addAnnotation( def, prop, dup[prop] );
delete dup[prop]; // we want to keep $duplicates, but not have duplicate props
}
else if (prop === 'doc') {
// With explicit docComment:false, we don't emit a warning.
if (def.doc && this.options.docComment !== false) {
this.warning( 'syntax-duplicate-doc-comment', def.doc.location, {},
'Doc comment is overwritten by another one below' );
}
def.doc = dup.doc;
delete dup[prop]; // we want to keep $duplicates for LSP, but not have duplicate props
}
else if (extensionDicts[prop]) {
if (def[prop])
this.message( 'syntax-duplicate-annotate', [ def.name.location ], { name, prop } );
def[prop] = dup[prop]; // continuation semantics: last wins
delete dup[prop]; // we want to keep $duplicates for LSP, but not have duplicate props
}
}
if (dup.$annotations) { // update deprecated $annotations for cds-lsp / annotation modeler
if (def.$annotations)
def.$annotations.push( ...dup.$annotations );
else
def.$annotations = dup.$annotations;
}
}
// We keep duplicate statements for LSP, as it needs to traverse all identifiers;
// annotations were removed above to avoid traversing annotations twice.
}
}
function addOneForDefinition( count, ext ) {
return (ext.kind === 'extend') ? count : count + 1;
}
/**
* Handle duplicate extensions. Does not handle `annotate`.
*
* @param {XSN.Extension} ext
* @param {string} name
* @param {number} numDefines
*/
function handleDuplicateExtension( ext, name, numDefines ) {
if (ext.kind === 'extend') {
this.error( 'syntax-duplicate-extend', [ ext.name.location ],
{ name, '#': (numDefines ? 'define' : 'extend') } );
}
else if (numDefines === 1) {
ext.$errorReported = 'syntax-duplicate-extend';
} // a definition, but not duplicate
}
/**
* Return start location of `token`, or the first token matched by the current
* rule if `token` is undefined
*
* @returns {Location}
*/
function startLocation( token = this._ctx.start ) {
return new Location(
this.filename,
token.line,
token.column + 1
);
}
/**
* Return location of `token`. If `endToken` is provided, use its end
* location as end location in the result.
*
* @param {object} token
* @param {object} endToken
* @return {Location}
*/
function tokenLocation( token, endToken = null ) {
if (!token)
return undefined;
if (!endToken) // including null
endToken = token;
// Default for single line tokens
const endLine = endToken.line;
// after the last char (special for EOF?)
const endCol = endToken.stop - endToken.start + endToken.column + 2;
const loc = new Location( this.filename, token.line, token.column + 1, endLine, endCol );
// This check is done for performance reason. No need to access a token's
// data if we know that it spans only one single line.
if (this.isMultiLineToken(token))
this.fixMultiLineTokenEndLocation(token, loc);
return loc;
}
function isMultiLineToken( token ) {
return (
token.type === this.constructor.DocComment ||
token.type === this.constructor.String || // TODO: do not check every string content
token.type === this.constructor.UnterminatedLiteral
);
}
/**
* Adapt end location of `location` according to `token`, assuming that `token` is a multi-line
* token such as a multi-line string or doc comment.
*
* Sets `endLine`/`endCol`, respecting newline characters in the token.
*
* @param token
* @param {CSN.Location} location
*/
function fixMultiLineTokenEndLocation( token, location ) {
// Count the number of newlines in the token.
const source = token.source[1].data;
let newLineCount = 0;
let lastNewlineIndex = token.start;
for (let i = token.start; i < token.stop; i++) {
// Note: We do NOT check for CR, LS, and PS (/[\r\u2028\u2029]/)
// because ANTLR only uses LF for line break detection.
if (source[i] === 10) { // code point of '\n'
newLineCount++;
lastNewlineIndex = i;
}
}
if (newLineCount > 0) {
location.endLine = token.line + newLineCount;
location.endCol = token.stop - lastNewlineIndex + 1;
}
else {
location.endLine = token.line;
// after the last char (special for EOF?)
location.endCol = token.stop - token.start + token.column + 2;
}
}
/**
* Return `val` with a location; if `val` and `endToken` are not provided, use the
* lower-cased token string of `startToken` as `val`. As location, use the
* location covered by `startToken` and `endToken`, or only `startToken` if no
* `endToken` is provided. The `startToken` defaults to the previous token.
*
* @param {object} startToken
* @param {object} endToken
* @param {any} val
*/
function valueWithTokenLocation( val = undefined, startToken = this._input.LT(-1),
endToken = undefined ) {
// if (!startToken)
// startToken = this._input.LT(-1);
const loc = this.tokenLocation( startToken, endToken );
return {
location: loc,
val: (endToken || val !== undefined) ? val : startToken.text.toLowerCase(),
};
}
function previousTokenAtLocation( location ) {
let k = -1;
let token = this._input.LT(k);
while (token.line > location.line ||
token.line === location.line && token.column >= location.col)
token = this._input.LT(--k);
return (token.line === location.line && token.column + 1 === location.col) && token;
}
// Create a location with location properties `filename` and `start` from
// argument `start`, and location property `end` from argument `end`.
function combinedLocation( start, end ) {
if (!start || !start.location)
start = { location: this.startLocation() };
return locUtils.combinedLocation( start, end );
}
// make sure that the parens of `IN (…)` do not disappear:
function secureParens( expr ) {
const op = expr?.op?.val;
const $parens = expr?.$parens;
if (!$parens || expr.query || op && op !== 'call' && op !== 'cast')
return expr;
// ensure that references, literals and functions keep their surrounding parentheses
// (is for expressions the case anyway)
delete expr.$parens;
return {
op: { val: 'xpr', location: this.startLocation() },
args: [ expr ],
location: { __proto__: Location.prototype, ...expr.location },
$parens,
};
}
function surroundByParens( expr, open, close, asQuery = false ) {
if (!expr)
return expr;
const location = this.tokenLocation( open, close );
if (expr.$parens)
expr.$parens.push( location );
else
expr.$parens = [ location ];
if (expr.$opPrecedence)
expr.$opPrecedence = null;
return (asQuery) ? { query: expr, location } : expr;
}
function tokensToStringRepresentation( start, stop ) {
const tokens = this._input.getTokens(
start.tokenIndex,
stop.tokenIndex + 1, null
).filter(tok => tok.channel === antlr4.Token.DEFAULT_CHANNEL);
if (tokens.length === 0)
return '';
let result = tokens[0].text;
for (let i = 1; i < tokens.length; ++i) {
const str = normalizeNewLine(tokens[i].text);
result += (tokens[i].start > tokens[i - 1].stop + 1) ? ` ${ str }` : str;
}
return result;
}
function unaryOpForParens( query, val ) {
const parens = query?.$parens;
if (!parens)
return query;
const location = parens[parens.length - 1];
return { op: { val, location }, location, args: [ query ] };
}
// ANTLR on some OS might corrupt non-ASCII chars for messages
function warnIfColonFollows( anno ) {
const t = this.getCurrentToken();
if (t.text === ':') {
this.warning( 'syntax-missing-parens', anno.name.location,
{ code: '@‹anno›', op: ':', newcode: '@(‹anno›…)' },
// eslint-disable-next-line @stylistic/js/max-len
'When $(CODE) is followed by $(OP), use $(NEWCODE) for annotation assignments at this position' );
}
}
// If the token before the current one is a doc comment (ignoring other tokens
// on the hidden channel), put its "cleaned-up" text as value of property `doc`
// of arg `node` (which could be an array). Complain if `doc` is already set.
//
// The doc comment token is not a non-hidden token for the following reasons:
// - misplaced doc comments would lead to a parse error (incompatible),
// - would influence the prediction, probably even induce adaptivePredict() calls,
// - is only slightly "more declarative" in the grammar.
function docComment( node ) {
const token = this._input.getHiddenTokenToLeft( this.constructor.DocComment );
if (!token)
return;
// This token is actually used by / assigned to an artifact.
token.isUsed = true;
// With explicit docComment:false, we don't emit a warning.
if (node.doc && this.options.docComment !== false) {
this.warning( 'syntax-duplicate-doc-comment', node.doc.location, {},
'Doc comment is overwritten by another one below' );
}
// Either store the doc comment or a marker that there is one.
const val = !this.options.docComment ? true : parseDocComment( token.text );
node.doc = this.valueWithTokenLocation( val, token );
}
/**
* Classify token (identifier category) for implicit names. To be used in the
* empty alternative to AS <explicitName>. If `ref` is given, uses the last
* path segment's `tokenIndex`. The return value can be used to reset the
* token's category, e.g. for inline select items.
*
* @param {string} category
* @param [ref]
*/
function classifyImplicitName( category, ref ) {
if (!ref || ref.path) {
const tokenIndex = ref?.path.at(-1)?.location.tokenIndex;
const implicit = (tokenIndex === undefined) ? this._input.LT(-1) : this._input.get(tokenIndex);
if (implicit.isIdentifier) {
const previous = implicit.isIdentifier;
implicit.isIdentifier = category;
return { token: implicit, previous };
}
}
return null;
}
function fragileAlias( ast, safe = false ) {
if (this.getCurrentToken().text === '.')
return ast;
if (safe || ast.$delimited || !/^[a-zA-Z][a-zA-Z_]+$/.test( ast.id )) {
this.warning( 'syntax-deprecated-auto-as', ast.location, { keyword: 'as' },
'Add keyword $(KEYWORD) in front of the alias name' );
}
else { // configurable error
this.message( 'syntax-missing-as', ast.location, { keyword: 'as' },
'Add keyword $(KEYWORD) in front of the alias name' );
}
return ast;
}
// Return AST for identifier token `token`. Also check that identifier is not empty.
function identAst( token, category, noTokenTypeCheck = false ) {
if (!token) { // for rule identAst
const { start, stop } = this._ctx; // token.tokenIndex
// - correct parsing: start = stop
// - singleTokenDeletion(), e.g. with `| Ident`: start < stop → stop
// - after recoverInline: start > stop (!) → stop = the previous token, if it is
// ident-like and the one before not in `.@#`, → start ('') otherwise
token = stop;
if (start.tokenIndex > stop.tokenIndex &&
(stop.type !== this.constructor.Identifier && !/^[a-zA-Z_]+$/.test( stop.text ) ||
[ '.', '@', '#' ].includes( this._input.LT(-2)?.text )))
token = start;
}
token.isIdentifier = category;
let id = token.text;
if (!noTokenTypeCheck &&
token.type !== this.constructor.Identifier && !/^[a-zA-Z_]+$/.test( id ))
id = '';
if (token.text[0] === '!') {
id = id.slice( 2, -1 ).replace( /]]/g, ']' );
if (!id)
this.message( 'syntax-invalid-name', token, {} );
// $delimited is used to complain about ![$self] and other magic vars usage;
// we might complain about that already here via @arg{category}
const ast = { id, $delimited: true, location: this.tokenLocation( token ) };
ast.location.tokenIndex = token.tokenIndex;
return ast;
}
if (token.text[0] !== '"') {
const ast = { id, location: this.tokenLocation(token) };
ast.location.tokenIndex = token.tokenIndex;
return ast;
}
// delimited:
id = id.slice( 1, -1 ).replace( /""/g, '"' );
if (!id) {
this.message( 'syntax-invalid-name', token, {} );
}
else {
this.message( 'syntax-deprecated-ident', token, { delimited: id },
// eslint-disable-next-line @stylistic/js/max-len
'Deprecated delimited identifier syntax, use $(DELIMITED) - strings are delimited by single quotes' );
}
const ast = { id, $delimited: true, location: this.tokenLocation( token ) };
ast.location.tokenIndex = token.tokenIndex;
return ast;
}
function reportPathNamedManyOrOne( { path } ) {
if (path.length === 1 && !path[0].$delimited &&
[ 'many', 'one' ].includes( path[0].id.toLowerCase() )) {
this.message( 'syntax-unexpected-many-one', path[0].location,
{ code: path[0].id, delimited: path[0].id } );
}
}
function reportVirtualAsRef() {
const { type, text } = this._input.LT(2);
if (this.constructor.Number < type && type <= this.constructor.Identifier ||
[ '+', '-', '(' ].includes( text )) {
// remark: we do not need to include 'not', as condition operators are only
// allowed inside parentheses in the old parser
const token = this._input.LT(1);
this.message( 'syntax-deprecated-ref-virtual', token, {
'#': (text === '(' ? 'func' : 'ref'),
name: token.text,
delimited: token.text,
} );
}
}
function reportMissingSemicolon() {
const next = this._input.LT(1);
if (next.text !== ';' && next.text !== '' && // ';' by insertSemicolon()
next.text !== '}' && next.type !== antlr4.Token.EOF &&
this._input.LT(-1).text !== '}') {
const offending = this.literalNames[next.type] || this.symbolicNames[next.type];
const loc = this.tokenLocation( this._input.LT(-1) );
// better location after the previous token:
const location = new Location( loc.file, loc.endLine, loc.endCol );
// it would be nicer to mention the doc comment if present, but not worth the
// effort; 'syntax-missing-semicolon' already used
this.warning( 'syntax-missing-proj-semicolon', location,
{ expecting: [ "';'" ], offending },
'Missing $(EXPECTING) before $(OFFENDING)');
}
}
function pushXprToken( args ) {
const token = this._input.LT(-1);
args.push( {
location: this.tokenLocation( token ),
val: token.text.toLowerCase(), // TODO: remove toLowerCase() ?
literal: 'token',
} );
}
function valuePathAst( ref ) {
// TODO: XSN representation of functions is a bit strange - rework
const { path } = ref;
if (!path || path.broken)
return ref;
if (path.length === 1) {
const { args, id, location } = path[0];
if (args
? path[0].$syntax === ':'
: path[0].$delimited || !functionsWithoutParentheses.includes( id.toUpperCase() ))
return ref;
const implicit = this.previousTokenAtLocation( location );
if (implicit && implicit.isIdentifier)
implicit.isIdentifier = 'func';
const filter = path[0].cardinality || path[0].where;
if (filter)
this.message( 'syntax-unexpected-filter', filter.location, {} );
const op = { location, val: 'call' };
return (args)
? {
op, func: ref, location: ref.location, args,
}
: { op, func: ref, location: ref.location };
}
// $syntax === ':' => path(P: 1)
// $syntax !== ':' => path(P => 1) or path(1) or path()
const firstFunc = path.findIndex( i => i.args && i.$syntax !== ':' );
if (firstFunc === -1) // also covers empty paths
return ref;
// Method Call ---------------------------
// Transform the path into `.`-operators.
// Everything after the first function is also a function, and not a reference.
for (let i = firstFunc; i < path.length; ++i) {
if (path[i].args && path[i].$syntax === ':') {
// Error for `a(P => 1).b.c(P: 1)`: no ref after function.
this.$messageFunctions.error('syntax-invalid-ref', path[i].args[$location], {
code: '=>',
}, 'References after function calls can\'t be resolved. Use $(CODE) in function arguments');
break;
}
const filter = path[i].cardinality || path[i].where;
if (filter)
this.message( 'syntax-unexpected-filter', filter.location, {} );
}
const args = [];
if (firstFunc > 0) {
args.push({
path: path.slice(0, firstFunc),
location: locUtils.combinedLocation(path[0].location, path[path.length - 1].location),
});
}
const pathRest = path.slice(firstFunc);
for (const method of pathRest) {
if (method !== pathRest[0] || firstFunc > 0) {
args.push({
// TODO: Update parser to have proper location for `.`?
location: weakLocation(method.location),
val: '.',
literal: 'token',
});
}
const func = {
op: { location: method.location, val: 'call' },
func: { path: [ method ] },
location: method.location,
};
if (method.args)
func.args = method.args;
args.push(func);
}
return {
op: {
val: 'ixpr',
location: this.startLocation(),
},
args,
location: ref.location,
};
}
/**
* Adds the first argument of `args` ('new' keyword) to the second argument, if it's a method-ixpr.
*
* @todo Cleanup, remove.
* @param args
*/
function fixNewKeywordPlacement( args ) {
// TODO: Currently, the parser creates an args-array with `new` and an `ixpr` for
// `new P().abc()`. That is, "new" is separate from the methods.
// This function tries to work around it, but its more of a hack.
if (args.length !== 2 || !args[1].args || args[1].op?.val !== 'ixpr')
return;
const ixpr = args[1];
ixpr.args.unshift(args[0]);
args.length = 0;
args.push(ixpr);
}
function expressionAsAnnotationValue( assignment, cond, start, stop ) {
if (!cond) // parse error
return;
Object.assign(assignment, cond);
assignment.$tokenTexts = this.tokensToStringRepresentation( start, stop );
}
// If a '-' is directly before an unsigned number, consider it part of the number;
// otherwise (including for '+'), represent it as extra unary prefix operator.
function signedExpression( args, expr ) {
// if (args.length !== 1) throw new CompilerAssertion()
const sign = args[0];
const nval
= (sign.val === '-' &&
expr && // expr may be null if `-` rule can't be parsed
expr.literal === 'number' &&
sign.location.endLine === expr.location.line &&
sign.location.endCol === expr.location.col &&
(typeof expr.val === 'number'
? expr.val >= 0 && -expr.val
: !expr.val.startsWith('-') && `-${ expr.val }`)) || false;
if (nval === false) {
args.push( expr );
}
else {
expr.val = nval;
--expr.location.col;
args[0] = expr;
}
}
/**
* Return number literal (XSN) for number token `token` with optional token `sign`.
* Represent the number as a JS number in property `val` if the number can safely be
* represented as one. Represent the number by a string, the token lexeme, if the
* stringified version of the number does not match the token lexeme.
*
* TODO: Always use text !== `${ num }`
*/
function numberLiteral( sign, text = this._input.LT(-1).text ) {
const token = this._input.LT(-1);
let location = this.tokenLocation( token );
const nextToken = this._input.LT(1);
if (token.type === this.constructor.Number &&
token.stop + 1 === nextToken.start &&
(nextToken.type === this.constructor.Identifier ||
nextToken.type < this.constructor.Identifier && /^[a-z]+$/i.test( nextToken.text ))) {
this.message('syntax-expecting-space', nextToken, {},
'Expecting a space between a number and a keyword/identifier');
}
if (sign) {
const { endLine, endCol } = location;
location = this.startLocation( sign );
location.endLine = endLine;
location.endCol = endCol;
text = sign.text + text;
this.reportUnexpectedSpace( sign, this.tokenLocation( token ) );
}
const num = Number.parseFloat( text || '0' ); // not Number.parseInt() !
const normalized = normalizeNumberString(text);
if (normalized !== `${ num }` && normalized !== `${ sign.text }${ num }`)
return { literal: 'number', val: normalized, location };
return { literal: 'number', val: num, location };
}
/**
* Given `token`, return a number literal (XSN). If the number is not an unsigned integer
* or it can't be represented in JS, emit an error.
*/
function unsignedIntegerLiteral() {
const token = this._input.LT(-1);
const location = this.tokenLocation( token );
const text = token.text || '0';
const num = Number.parseFloat( text ); // not Number.parseInt() !
if (!Number.isSafeInteger(num)) {
this.error( 'syntax-expecting-unsigned-int', token,
{ '#': !text.match(/^\d*$/) ? 'normal' : 'unsafe' } );
}
else if (text.match(/^\d+[.]\d+$/)) {
// More restrictive check: 10.0 emits a message, because we don't expect
// any decimal places.
const dotLoc = { ...location };
dotLoc.col += text.indexOf('.');
dotLoc.endCol = dotLoc.col + 1;
this.info( 'syntax-ignoring-decimal', dotLoc );
}
return { literal: 'number', val: num, location };
}
// Make the annotation `anno` have `value` as value. This function is basically
// just `Object.assign`, but we really try to represent the provided CDL number as
// JSON number. We give a warning if this is not possible or leads to a precision
// loss.
function assignAnnotationValue( anno, value ) {
const { val } = value;
if (value.literal === 'number' && typeof val !== 'number') {
// a number in CDL, but stored as string in `val` - due to rounding or scientific notation
let num = Number.parseFloat( val || '0' );
const inf = !Number.isFinite( num );
if (inf)
num = val;
if (inf || relevantDigits( val ) !== relevantDigits( num.toString() )) {
this.warning( 'syntax-invalid-anno-number', value.location,
{ '#': (inf ? 'infinite' : 'rounded' ), rawvalue: val, value: num },
{
std: 'Annotation number $(RAWVALUE) is put as $(VALUE) into the CSN',
rounded: 'Annotation number $(RAWVALUE) is rounded to $(VALUE)',
// eslint-disable-next-line @stylistic/js/max-len
infinite: 'Annotation value $(RAWVALUE) is infinite as number and put as string into the CSN',
} );
}
value.val = num;
}
Object.assign( anno, value );
}
function relevantDigits( val ) {
// We know the value does not contain newlines, hence the RegEx is safe.
// eslint-disable-next-line sonarjs/slow-regex
val = val.replace( /e.+$/i, '' );
// To avoid the super-linear RegEx `0+$`, use the non-backtracking version and
// simply check if we're at the end.
const trailingZeroes = /0+/g;
let re;
while ((re = trailingZeroes.exec(val)) !== null) {
if (trailingZeroes.lastIndex === val.length) {
val = val.slice(0, re.index);
break;
}
}
return val
.replace( /\./, '' )
.replace( /^[-+0]+/, '' );
}
// Create AST node for quoted literals like string and e.g. date'2017-02-22'.
// This function might issue a message and might change the `literal` and
// `val` property according to `quotedLiteralPatterns` above.
function quotedLiteral( token, literal ) {
/** @type {CSN.Location} */
const location = this.tokenLocation( token );
let pos;
let val;
if (token.text.startsWith('`')) {
val = this.parseMultiLineStringLiteral(token);
literal = 'string';
}
else {
pos = token.text.search( '\'' ) + 1; // pos of char after quote
val = token.text.slice( pos, -1 ).replace( /''/g, '\'' );
}
if (!literal)
literal = token.text.slice( 0, pos - 1 ).toLowerCase();
const p = quotedLiteralPatterns[literal] || {};
if (p.test_fn && !p.test_fn(val) && !this.options.parseOnly)
this.warning( 'syntax-invalid-literal', location, { '#': p.test_variant } );
if (p.unexpected_char) {
const idx = val.search(p.unexpected_char);
if (idx > -1) {
this.warning( 'syntax-invalid-literal', {
file: location.file,
line: location.line,
endLine: location.line,
col: atChar(idx),
endCol: atChar( idx + (val[idx] === '\'' ? 2 : 1) ),
}, { '#': p.unexpected_variant } );
}
}
return {
literal: p.literal || literal,
val: p.normalize && p.normalize(val) || val,
location,
};
function atChar( i ) {
// Is only used with single-line strings.
return location.col + pos + i;
}
}
function pushIdent( path, ident, prefix ) {
if (!ident) {
path.broken = true;
}
else if (!prefix) {
path.push( ident );
}
else {
const { location } = ident;
const prefixLoc = this.reportUnexpectedSpace( prefix, location );
location.line = prefixLoc.line;
location.col = prefixLoc.col;
ident.id = prefix.text + ident.id;
path.push( ident );
}
}
function pushItem( array, val ) {
if (!array)
return;
if (val != null)
array.push(val);
else
array.broken = true;
}
// For :param, #variant, #symbol, @(…) and @Begin and `@` inside annotation paths
function reportUnexpectedSpace( prefix = this._input.LT(-1),
location = this.tokenLocation( this._input.LT(1) ),
isError = false ) {
const prefixLoc = this.tokenLocation( prefix );
if (prefixLoc.endLine !== location.line ||
prefixLoc.endCol !== location.col) {
const wsLocation = {
file: location.file,
line: prefixLoc.endLine, // !
col: prefixLoc.endCol, // !
endLine: location.line,
endCol: location.col,
};
if (isError) {
this.message( 'syntax-invalid-space', wsLocation, { op: prefix.text },
'Delete the whitespace after $(OP)' );
}
else {
this.warning( 'syntax-unexpected-space', wsLocation, { op: prefix.text },
'Delete the whitespace after $(OP)' );
}
}
return prefixLoc;
}
// Add new definition `art` to dictionary property `env` of node `parent`.
// Return `art`.
//
// If argument `kind` is provided, set `art.kind` to that value.
// If argument `name` is provided, set `art.name`:
// - if `name` is an array, `name.id` consist of the ID of the last array item
// (for elements via columns, foreign keys, table aliases)
// - if `name` is an object, `name.id` is either set, or the (local) name is calculated
// from the IDs of all items in `name.path` (for main artifact definitions).
function addDef( art, parent, env, kind, name ) {
if (Array.isArray(name)) {
const last = name.length && name[name.length - 1];
art.name = { // A.B.C -> 'C'
id: last?.id || '', location: last.location, $inferred: 'as',
};
}
else if (name) {
art.name = name;
if (!name.id && kind === null) // namedValue, fortunately no `variant` there
art.name.id = pathName( art.name?.path );
}
else {
art.name = { id: '' };
}
if (kind)
art.kind = kind;
const id = art.name?.id || pathName( art.name?.path ); // returns '' for corrupted name
if (env === 'artifacts' || env === 'vocabularies') {
dictAddArray( parent[env], id, art );
}
else if (kind || this.options.parseOnly) { // TODO: do not check parseOnly
dictAdd( parent[env], id, art );
}
else {
dictAdd( parent[env], id, art, ( duplicateName, loc ) => {
// do not use function(), otherwise `this` is wrong:
if (kind === 0) {
this.error( 'syntax-duplicate-argument', loc, { name: duplicateName },
'Duplicate value for parameter $(NAME)' );
}
else if (kind === '') {
this.error( 'syntax-duplicate-excluding', loc,
{ name: duplicateName, keyword: 'excluding' } );
}
else {
this.error( 'syntax-duplicate-property', loc, { name: duplicateName },
'Duplicate value for structure property $(NAME)' );
}
} );
}
return art;
}
// Add new definition `art` to array property `env` of node `parent`.
// Also set `kind`. Returns `art`.
function addItem( art, parent, env, kind ) {
art.kind = kind;
parent[env].push( art );
return art;
}
/**
* Add `annotate/extend Main.Artifact:elem.sub` to `‹xsn›.extensions`:
* - the array item is an extend/annotate for `Main.Artifact`,
* - for each path item in `elem.sub`, we add an `elements` property containing
* one extend/annotate for the corresponding element
* - The deepest extend/annotate is the object which is to be extended
*
* @param {object} ext The object containing the location and annotations for the extension.
* @param {object} parent The parent containing the `extensions` property, i.e. the source.
* @param {string} kind Either `annotate` or `extend`.
* @param {object} artName The "name object" for `Main.Artifact`.
* @param {XSN.Path} elemPath Path as returned by `simplePath` rule.
*/
function addExtension( ext, parent, kind, artName, elemPath ) {
const { location } = ext;
if (!Array.isArray( elemPath ) || !elemPath.length || elemPath.broken) {
ext.name = artName;
this.addItem( ext, parent, 'extensions', kind );
return;
}
// Note: the element extensions share a common `location`, also with the
// extension of the main artifact; its end location will usually set later
parent = this.addItem( { name: artName, location }, parent, 'extensions', kind );
const last = elemPath[elemPath.length - 1];
for (const seg of elemPath) {
parent.elements = Object.create(null); // no dict location → no createDict()
parent = this.addDef( (seg === last ? ext : { location }),
parent, 'elements', kind, seg );
}
}
// must be in action directly after having parsed '{', '(`, or a keyword before
function createDict() {
const dict = Object.create(null);
dict[$location] = this.startLocation( this._input.LT(-1) );
return dict;
}
// must be in action directly after having parsed '[' or '(` or `{`
function createArray() {
const array = [];
array[$location] = this.startLocation( this._input.LT(-1) );
return array;
}
// must be in action directly after having parsed '}' or ')`
function finalizeDictOrArray( dict ) {
const loc = dict[$location];
if (!loc)
return;
const stop = this._input.LT(-1);
loc.endLine = stop.line;
loc.endCol = stop.stop - stop.start + stop.column + 2;
}
function insertSemicolon() {
const currentToken = this._input.tokens[this._input.index];
const requireSemicolon = this.topLevelKeywords.includes(currentToken.type);
if (requireSemicolon) {
this.noAssignmentInSameLine();
const prev = this._input.LT(-1);
const t = CommonTokenFactory.create(
currentToken.source,
this.literalNames.indexOf( "';'" ),
'', antlr4.Token.DEFAULT_CHANNEL,
prev.stop, prev.stop,
prev.line, prev.column
);
t.tokenIndex = prev.tokenIndex + 1;
this._input.tokens.splice(t.tokenIndex, 0, t);
// Update tokenIndex: There could have been comments between two non-hidden tokens.
for (let tokenIndex = t.tokenIndex + 1; tokenIndex < this._input.tokens.length; tokenIndex++)
this._input.tokens[tokenIndex].tokenIndex += 1;
this._input.index = t.tokenIndex;
}
}
function createSource() {
return new XsnSource();
}
const operatorPrecedences = {
// query:
union: 1,
except: 1,
minus: 1,
intersect: 2,
};
// Create AST node for binary operator `op` and arguments `args`
function leftAssocBinaryOp( expr, right, opToken, eToken, extraProp ) {
if (!right)
return expr;
const op = this.valueWithTokenLocation( opToken.text.toLowerCase(), opToken );
const extra = eToken
? this.valueWithTokenLocation( eToken.text.toLowerCase(), eToken )
: undefined;
if (!expr.$parens && expr.op?.val === op.val && expr[extraProp]?.val === extra?.val) {
expr.args.push( right );
return expr;
}
const opPrec = operatorPrecedences[op.val] || 0;
let left = expr;
let args;
while (opPrec > nodePrecedence( left )) {
args = left.args;
left = args[args.length - 1];
}
// TODO: location correct?
const node = (extra) // eslint-disable-next-line
? { op, [extraProp]: extra, args: [ left, right ], location: left.location }
: { op, args: [ left, right ], location: left.location };
if (!args)
return node;
args[args.length - 1] = node;
return expr;
}
function nodePrecedence( node ) {
const { op } = node;
return op && !node.$parens && operatorPrecedences[op.val] || Infinity;
}
function pushOpToken( args, precedence ) { // for nary only; uses LT(-1) as operator token
let node = null;
let left = args;
while (left?.$opPrecedence && left.$opPrecedence < precedence) {
args = left;
node = args[args.length - 1]; // last sub node of left side
left = node.args;
}
if (left?.$opPrecedence === precedence ) { // nary
args = left;
}
else if (node) {
const sub = this.argsExpression( [ node, null ], true );
args[args.length - 1] = sub;
args = sub.args;
args.length = 1;
}
else if (args.length > 1) { // new top-level op & op on left
args[0] = this.argsExpression( [ ...args ], args.$opPrecedence != null ); // finish expresion
args.length = 1;
}
args.$opPrecedence = precedence;
// TODO (if necessary): `location` for sub expessions, top-level is be properly set
this.pushXprToken( args );
return args;
}
// only to be used in @after or via pushOpToken
function argsExpression( args, nary ) {
if (args.length === 1) // args.length === 0 is ok (for OVER…)
return args[0];
const $parens = args[0]?.$parens;
const loc = ($parens) ? $parens[$parens.length - 1] : args[0]?.location;
const location = loc ? { __proto__: Location.prototype, ...loc } : this.startLocation();
// console.log('AE:',args);
const op = {
// eslint-disable-next-line no-nested-ternary
val: nary === '?:' ? nary : nary ? 'nary' : 'ixpr',
location,
};
return this.attachLocation( { op, args, location } );
}
const maxCardinalityKeywords = { 1: 'one', '*': 'many' };
function setMaxCardinality( art, targetMax, token ) { // - val
if (token)
targetMa