@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
584 lines • 24.5 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.normalizeTreeSitterTreeToAst = normalizeTreeSitterTreeToAst;
exports.makeTreeSitterLax = makeTreeSitterLax;
exports.makeTreeSitterStrict = makeTreeSitterStrict;
const normalizer_data_1 = require("../ast/parser/main/normalizer-data");
const tree_sitter_types_1 = require("./tree-sitter-types");
const type_1 = require("../ast/model/type");
const retriever_1 = require("../../retriever");
const convert_values_1 = require("../convert-values");
const normalize_meta_1 = require("../ast/parser/main/normalize-meta");
const arrays_1 = require("../../../util/arrays");
const r_function_call_1 = require("../ast/model/nodes/r-function-call");
const strings_1 = require("../../../util/strings");
const config_1 = require("../../../config");
const log_1 = require("../../../util/log");
/**
* @param tree - The tree to normalize
*/
function normalizeTreeSitterTreeToAst(tree) {
const lax = (0, config_1.getEngineConfig)('tree-sitter')?.lax;
if (lax) {
makeTreeSitterLax();
}
else {
makeTreeSitterStrict();
}
const root = convertTreeNode(tree.rootNode);
if (root.type !== type_1.RType.ExpressionList) {
throw new normalizer_data_1.ParseError(`expected root to resolve to an expression list, got a ${root.type}`);
}
return root;
}
function nonErrorChildrenStrict(node) {
return node.hasError ? [] : node.children;
}
function nonErrorChildrenLax(node) {
return node.hasError ? node.children.filter(n => n.type !== tree_sitter_types_1.TreeSitterType.Error) : node.children;
}
let nonErrorChildren = nonErrorChildrenStrict;
function makeTreeSitterLax() {
log_1.log.info('[Tree-Sitter] Lax parsing active');
nonErrorChildren = nonErrorChildrenLax;
}
function makeTreeSitterStrict() {
log_1.log.info('[Tree-Sitter] Strict parsing active');
nonErrorChildren = nonErrorChildrenStrict;
}
function convertTreeNode(node) {
// generally, the grammar source file dictates what children a node has in what order:
// https://github.com/r-lib/tree-sitter-r/blob/main/grammar.js
const range = makeSourceRange(node);
const defaultInfo = {
info: {
fullRange: range,
additionalTokens: [],
fullLexeme: node.text
}
};
switch (node.type) {
case tree_sitter_types_1.TreeSitterType.Program: {
const [comments, children] = splitComments(nonErrorChildren(node));
const body = children.map(n => [n, convertTreeNode(n)]);
const remainingComments = linkCommentsToNextNodes(body, comments);
return {
type: type_1.RType.ExpressionList,
children: body.map(n => n[1]),
grouping: undefined,
lexeme: undefined,
info: {
additionalTokens: remainingComments.map(c => c[1])
}
};
}
case tree_sitter_types_1.TreeSitterType.BracedExpression:
case tree_sitter_types_1.TreeSitterType.ParenthesizedExpression: {
const [comments, children] = splitComments(nonErrorChildren(node));
const opening = children[0];
const body = children.slice(1, -1).map(n => [n, convertTreeNode(n)]);
const remainingComments = linkCommentsToNextNodes(body, comments);
const closing = children[children.length - 1];
return {
type: type_1.RType.ExpressionList,
location: undefined,
lexeme: undefined,
children: body.map(n => n[1]),
grouping: [
{
type: type_1.RType.Symbol,
location: makeSourceRange(opening),
content: (0, retriever_1.removeRQuotes)(opening.text),
lexeme: opening.text,
namespace: undefined,
...defaultInfo
}, {
type: type_1.RType.Symbol,
location: makeSourceRange(closing),
content: (0, retriever_1.removeRQuotes)(closing.text),
lexeme: closing.text,
namespace: undefined,
...defaultInfo
}
],
info: {
additionalTokens: remainingComments.map(c => c[1])
}
};
}
case tree_sitter_types_1.TreeSitterType.BinaryOperator: {
const children = nonErrorChildren(node);
const lhs = convertTreeNode(children[0]);
const rhs = convertTreeNode(children[children.length - 1]);
const [commentsBoth, [op]] = splitComments(children.slice(1, -1));
const comments = commentsBoth.map(c => c[1]);
const opSource = makeSourceRange(op);
const lhsAsArg = {
type: type_1.RType.Argument,
location: lhs.location,
value: lhs,
name: undefined,
lexeme: lhs.lexeme,
info: {}
};
if (op.type == 'special') {
return {
type: type_1.RType.FunctionCall,
location: opSource,
lexeme: node.text,
functionName: {
type: type_1.RType.Symbol,
location: opSource,
lexeme: op.text,
content: op.text,
namespace: undefined,
info: {}
},
arguments: [lhsAsArg, {
type: type_1.RType.Argument,
location: rhs.location,
value: rhs,
name: undefined,
lexeme: rhs.lexeme,
info: {}
}],
named: true,
infixSpecial: true,
info: {
additionalTokens: comments
}
};
}
else if (op.text === '|>') {
return {
type: type_1.RType.Pipe,
location: opSource,
lhs: lhsAsArg,
rhs,
lexeme: op.text,
...defaultInfo,
info: {
fullRange: range,
additionalTokens: comments,
fullLexeme: node.text
}
};
}
else {
return {
type: type_1.RType.BinaryOp,
location: opSource,
lhs, rhs,
operator: op.text,
lexeme: op.text,
info: {
fullRange: range,
additionalTokens: comments,
fullLexeme: node.text
}
};
}
}
case tree_sitter_types_1.TreeSitterType.UnaryOperator: {
const [op, operand] = nonErrorChildren(node);
return {
type: type_1.RType.UnaryOp,
operand: convertTreeNode(operand),
location: makeSourceRange(op),
operator: op.text,
lexeme: op.text,
...defaultInfo
};
}
case tree_sitter_types_1.TreeSitterType.NamespaceOperator: {
const [lhs, /* :: or ::: */ , rhs] = nonErrorChildren(node);
return {
type: type_1.RType.Symbol,
location: makeSourceRange(rhs),
content: rhs.text,
lexeme: rhs.text,
namespace: lhs.text,
...defaultInfo
};
}
case '(':
case ')':
case tree_sitter_types_1.TreeSitterType.Na:
case tree_sitter_types_1.TreeSitterType.Null:
case tree_sitter_types_1.TreeSitterType.Dots:
case tree_sitter_types_1.TreeSitterType.DotDotI:
case tree_sitter_types_1.TreeSitterType.Identifier:
case tree_sitter_types_1.TreeSitterType.Return:
return {
type: type_1.RType.Symbol,
location: range,
content: (0, strings_1.startAndEndsWith)(node.text, '`') ? node.text.slice(1, -1) : (0, retriever_1.removeRQuotes)(node.text),
lexeme: node.text,
namespace: undefined,
...defaultInfo
};
case tree_sitter_types_1.TreeSitterType.IfStatement: {
const [ifNode, /* ( */ , condition, /* ) */ , then, /* else */ , ...otherwise] = nonErrorChildren(node);
const filteredOtherwise = otherwise.filter(n => n.type !== tree_sitter_types_1.TreeSitterType.ElseStatement);
return {
type: type_1.RType.IfThenElse,
condition: convertTreeNode(condition),
then: (0, normalize_meta_1.ensureExpressionList)(convertTreeNode(then)),
otherwise: filteredOtherwise.length > 0 ? (0, normalize_meta_1.ensureExpressionList)(convertTreeNode(filteredOtherwise[0])) : undefined,
location: makeSourceRange(ifNode),
lexeme: ifNode.text,
...defaultInfo
};
}
case tree_sitter_types_1.TreeSitterType.ForStatement: {
const children = nonErrorChildren(node);
const forNode = children[0]; // we follow with a (
const variable = getNodesUntil(children, 'in', 2); // we follow with the "in"
const sequence = getNodesUntil(children, ')', 2 + variable.length + 1); // we follow with a (
const body = children[2 + variable.length + 1 + sequence.length + 1];
const [variableComments, [variableNode]] = splitComments(variable);
const [sequenceComments, [sequenceNode]] = splitComments(sequence);
return {
type: type_1.RType.ForLoop,
variable: {
type: type_1.RType.Symbol,
location: makeSourceRange(variableNode),
content: (0, retriever_1.removeRQuotes)(variableNode.text),
lexeme: variableNode.text,
namespace: undefined,
info: {
fullRange: undefined,
additionalTokens: [],
fullLexeme: undefined
}
},
vector: convertTreeNode(sequenceNode),
body: (0, normalize_meta_1.ensureExpressionList)(convertTreeNode(body)),
location: makeSourceRange(forNode),
lexeme: forNode.text,
info: {
fullRange: range,
additionalTokens: [...variableComments, ...sequenceComments].map(c => c[1]),
fullLexeme: node.text
}
};
}
case tree_sitter_types_1.TreeSitterType.WhileStatement: {
const [whileNode, /* ( */ , condition, /* ) */ , body] = nonErrorChildren(node);
return {
type: type_1.RType.WhileLoop,
condition: convertTreeNode(condition),
body: (0, normalize_meta_1.ensureExpressionList)(convertTreeNode(body)),
location: makeSourceRange(whileNode),
lexeme: whileNode.text,
...defaultInfo
};
}
case tree_sitter_types_1.TreeSitterType.RepeatStatement: {
const [repeatNode, body] = nonErrorChildren(node);
return {
type: type_1.RType.RepeatLoop,
body: (0, normalize_meta_1.ensureExpressionList)(convertTreeNode(body)),
location: makeSourceRange(repeatNode),
lexeme: repeatNode.text,
...defaultInfo
};
}
case tree_sitter_types_1.TreeSitterType.Call: {
const [func, argsParentheses] = nonErrorChildren(node);
// tree-sitter wraps next and break in a function call, but we don't, so unwrap
if (func.type === tree_sitter_types_1.TreeSitterType.Next || func.type == tree_sitter_types_1.TreeSitterType.Break) {
return {
...convertTreeNode(func),
...defaultInfo
};
}
const args = (0, arrays_1.splitArrayOn)(nonErrorChildren(argsParentheses).slice(1, -1), x => x.type === 'comma');
const funcRange = makeSourceRange(func);
const call = {
arguments: args.map(n => n.length == 0 ? r_function_call_1.EmptyArgument : convertTreeNode(n[0])),
location: funcRange,
lexeme: func.text,
...defaultInfo
};
if (func.type === tree_sitter_types_1.TreeSitterType.Identifier || func.type === tree_sitter_types_1.TreeSitterType.String || func.type === tree_sitter_types_1.TreeSitterType.NamespaceOperator || func.type === tree_sitter_types_1.TreeSitterType.Return) {
let funcNode = convertTreeNode(func);
if (funcNode.type === type_1.RType.String) {
funcNode = {
...funcNode,
type: type_1.RType.Symbol,
namespace: undefined,
content: (0, retriever_1.removeRQuotes)(func.text)
};
}
return {
...call,
type: type_1.RType.FunctionCall,
functionName: {
...funcNode,
info: {
fullRange: range,
additionalTokens: [],
fullLexeme: node.text
}
},
named: true
};
}
else {
return {
...call,
type: type_1.RType.FunctionCall,
calledFunction: convertTreeNode(func),
named: undefined
};
}
}
case tree_sitter_types_1.TreeSitterType.FunctionDefinition: {
const [name, paramsParens, body] = nonErrorChildren(node);
const params = (0, arrays_1.splitArrayOn)(paramsParens.children.slice(1, -1), x => x.type === 'comma');
return {
type: type_1.RType.FunctionDefinition,
parameters: params.map(n => convertTreeNode(n[0])),
body: (0, normalize_meta_1.ensureExpressionList)(convertTreeNode(body)),
location: makeSourceRange(name),
lexeme: name.text,
...defaultInfo
};
}
case tree_sitter_types_1.TreeSitterType.String:
return {
type: type_1.RType.String,
location: range,
content: (0, convert_values_1.string2ts)(node.text),
lexeme: node.text,
...defaultInfo
};
case tree_sitter_types_1.TreeSitterType.Float:
case tree_sitter_types_1.TreeSitterType.Integer:
case tree_sitter_types_1.TreeSitterType.Complex:
case tree_sitter_types_1.TreeSitterType.Inf:
case tree_sitter_types_1.TreeSitterType.Nan:
return {
type: type_1.RType.Number,
location: range,
content: (0, convert_values_1.number2ts)(node.text),
lexeme: node.text,
...defaultInfo
};
case tree_sitter_types_1.TreeSitterType.True:
case tree_sitter_types_1.TreeSitterType.False:
return {
type: type_1.RType.Logical,
location: range,
content: (0, convert_values_1.boolean2ts)(node.text),
lexeme: node.text,
...defaultInfo
};
case tree_sitter_types_1.TreeSitterType.Break:
case tree_sitter_types_1.TreeSitterType.Next:
return {
type: node.type == tree_sitter_types_1.TreeSitterType.Break ? type_1.RType.Break : type_1.RType.Next,
location: range,
lexeme: node.text,
...defaultInfo
};
case tree_sitter_types_1.TreeSitterType.Subset:
case tree_sitter_types_1.TreeSitterType.Subset2: {
// subset has children like a and [x]
const [func, content] = nonErrorChildren(node);
// bracket is now [ or [[ and argsClosing is x] or x]]
const [bracket, ...argsClosing] = nonErrorChildren(content);
const args = (0, arrays_1.splitArrayOn)(argsClosing.slice(0, -1), x => x.type === 'comma');
return {
type: type_1.RType.Access,
operator: bracket.text,
accessed: convertTreeNode(func),
access: args.map(n => n.length == 0 ? r_function_call_1.EmptyArgument : convertTreeNode(n[0])),
location: makeSourceRange(bracket),
lexeme: bracket.text,
...defaultInfo
};
}
case tree_sitter_types_1.TreeSitterType.ExtractOperator: {
const [lhs, operator, rhs] = nonErrorChildren(node);
const rhsRange = makeSourceRange(rhs);
return {
type: type_1.RType.Access,
operator: operator.text,
accessed: convertTreeNode(lhs),
access: [{
type: type_1.RType.Argument,
name: undefined,
value: {
...convertTreeNode(rhs),
...defaultInfo
},
location: rhsRange,
lexeme: rhs.text,
info: {
fullRange: rhsRange,
additionalTokens: [],
fullLexeme: rhs.text
}
}],
location: makeSourceRange(operator),
lexeme: operator.text,
...defaultInfo
};
}
case tree_sitter_types_1.TreeSitterType.Parameter: {
const children = nonErrorChildren(node);
const name = children[0];
const nameRange = makeSourceRange(name);
let defaultValue = undefined;
if (children.length == 3) {
defaultValue = convertTreeNode(children[2]);
}
return {
type: type_1.RType.Parameter,
name: {
type: type_1.RType.Symbol,
location: nameRange,
namespace: undefined,
content: name.text,
lexeme: name.text,
info: {
fullRange: range,
additionalTokens: [],
fullLexeme: name.text
}
},
special: name.text === '...',
defaultValue,
location: nameRange,
lexeme: name.text,
info: {
fullRange: range,
additionalTokens: [],
fullLexeme: name.text
}
};
}
case tree_sitter_types_1.TreeSitterType.Argument: {
const children = nonErrorChildren(node);
if (children.length == 1) {
const [arg] = children;
return {
type: type_1.RType.Argument,
name: undefined,
value: convertTreeNode(arg),
location: range,
lexeme: node.text,
...defaultInfo
};
}
else {
const [nameNode, /* = */ , valueNode] = children;
let name = convertTreeNode(nameNode);
// unescape argument names
if (name.type === type_1.RType.String) {
name = {
...name,
type: type_1.RType.Symbol,
content: name.content.str,
namespace: undefined
};
}
else if ((0, strings_1.startAndEndsWith)(name.content, '`')) {
name.content = name.content.slice(1, -1);
}
const nameRange = makeSourceRange(nameNode);
return {
type: type_1.RType.Argument,
name: name,
value: valueNode ? convertTreeNode(valueNode) : undefined,
location: nameRange,
lexeme: nameNode.text,
info: {
fullRange: nameRange,
additionalTokens: [],
fullLexeme: nameNode.text
}
};
}
}
case tree_sitter_types_1.TreeSitterType.Comment:
return {
type: type_1.RType.Comment,
location: range,
content: node.text.slice(1),
lexeme: node.text,
...defaultInfo
};
default:
throw new normalizer_data_1.ParseError(`unexpected node type ${node.type} at ${JSON.stringify(range)}`);
}
}
function makeSourceRange(node) {
return [
// tree-sitter is 0-based but we want 1-based
node.startPosition.row + 1, node.startPosition.column + 1,
// tree-sitter's end position is one off from ours, so we don't add 1 here
node.endPosition.row + 1, node.endPosition.column
];
}
function splitComments(nodes) {
const comments = [];
const others = [];
for (const node of nodes) {
if (node.type === tree_sitter_types_1.TreeSitterType.Comment) {
comments.push([node, {
type: type_1.RType.Comment,
location: makeSourceRange(node),
content: node.text.slice(1),
lexeme: node.text,
info: {
additionalTokens: [],
fullLexeme: node.text
}
}]);
}
else {
others.push(node);
}
}
return [comments, others];
}
function linkCommentsToNextNodes(nodes, comments) {
const remain = [];
for (const [commentSyntaxNode, commentNode] of comments) {
let sibling;
if (commentSyntaxNode.previousSibling?.endIndex === commentSyntaxNode.startIndex) {
// if there is a sibling on the same line, we link the comment to that node
sibling = commentSyntaxNode.previousSibling;
}
else {
sibling = commentSyntaxNode.nextSibling;
while (sibling && sibling.type === tree_sitter_types_1.TreeSitterType.Comment) {
sibling = sibling.nextSibling;
}
}
// if there is no valid sibling, we just link the comment to the first node (see normalize-expressions.ts)
const [, node] = (sibling ? nodes.find(([s]) => s.equals(sibling)) : undefined) ?? nodes[0] ?? [];
if (node) {
node.info.additionalTokens ??= [];
node.info.additionalTokens.push(commentNode);
}
else {
remain.push([commentSyntaxNode, commentNode]);
}
}
return remain;
}
function getNodesUntil(nodes, type, startIndex = 0) {
const ret = [];
for (let i = startIndex; i < nodes.length; i++) {
if (nodes[i].type === type) {
break;
}
ret.push(nodes[i]);
}
return ret;
}
//# sourceMappingURL=tree-sitter-normalize.js.map