UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

584 lines 24.5 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.normalizeTreeSitterTreeToAst = normalizeTreeSitterTreeToAst; exports.makeTreeSitterLax = makeTreeSitterLax; exports.makeTreeSitterStrict = makeTreeSitterStrict; const normalizer_data_1 = require("../ast/parser/main/normalizer-data"); const tree_sitter_types_1 = require("./tree-sitter-types"); const type_1 = require("../ast/model/type"); const retriever_1 = require("../../retriever"); const convert_values_1 = require("../convert-values"); const normalize_meta_1 = require("../ast/parser/main/normalize-meta"); const arrays_1 = require("../../../util/arrays"); const r_function_call_1 = require("../ast/model/nodes/r-function-call"); const strings_1 = require("../../../util/strings"); const config_1 = require("../../../config"); const log_1 = require("../../../util/log"); /** * @param tree - The tree to normalize */ function normalizeTreeSitterTreeToAst(tree) { const lax = (0, config_1.getEngineConfig)('tree-sitter')?.lax; if (lax) { makeTreeSitterLax(); } else { makeTreeSitterStrict(); } const root = convertTreeNode(tree.rootNode); if (root.type !== type_1.RType.ExpressionList) { throw new normalizer_data_1.ParseError(`expected root to resolve to an expression list, got a ${root.type}`); } return root; } function nonErrorChildrenStrict(node) { return node.hasError ? [] : node.children; } function nonErrorChildrenLax(node) { return node.hasError ? node.children.filter(n => n.type !== tree_sitter_types_1.TreeSitterType.Error) : node.children; } let nonErrorChildren = nonErrorChildrenStrict; function makeTreeSitterLax() { log_1.log.info('[Tree-Sitter] Lax parsing active'); nonErrorChildren = nonErrorChildrenLax; } function makeTreeSitterStrict() { log_1.log.info('[Tree-Sitter] Strict parsing active'); nonErrorChildren = nonErrorChildrenStrict; } function convertTreeNode(node) { // generally, the grammar source file dictates what children a node has in what order: // https://github.com/r-lib/tree-sitter-r/blob/main/grammar.js const range = makeSourceRange(node); const defaultInfo = { info: { fullRange: range, additionalTokens: [], fullLexeme: node.text } }; switch (node.type) { case tree_sitter_types_1.TreeSitterType.Program: { const [comments, children] = splitComments(nonErrorChildren(node)); const body = children.map(n => [n, convertTreeNode(n)]); const remainingComments = linkCommentsToNextNodes(body, comments); return { type: type_1.RType.ExpressionList, children: body.map(n => n[1]), grouping: undefined, lexeme: undefined, info: { additionalTokens: remainingComments.map(c => c[1]) } }; } case tree_sitter_types_1.TreeSitterType.BracedExpression: case tree_sitter_types_1.TreeSitterType.ParenthesizedExpression: { const [comments, children] = splitComments(nonErrorChildren(node)); const opening = children[0]; const body = children.slice(1, -1).map(n => [n, convertTreeNode(n)]); const remainingComments = linkCommentsToNextNodes(body, comments); const closing = children[children.length - 1]; return { type: type_1.RType.ExpressionList, location: undefined, lexeme: undefined, children: body.map(n => n[1]), grouping: [ { type: type_1.RType.Symbol, location: makeSourceRange(opening), content: (0, retriever_1.removeRQuotes)(opening.text), lexeme: opening.text, namespace: undefined, ...defaultInfo }, { type: type_1.RType.Symbol, location: makeSourceRange(closing), content: (0, retriever_1.removeRQuotes)(closing.text), lexeme: closing.text, namespace: undefined, ...defaultInfo } ], info: { additionalTokens: remainingComments.map(c => c[1]) } }; } case tree_sitter_types_1.TreeSitterType.BinaryOperator: { const children = nonErrorChildren(node); const lhs = convertTreeNode(children[0]); const rhs = convertTreeNode(children[children.length - 1]); const [commentsBoth, [op]] = splitComments(children.slice(1, -1)); const comments = commentsBoth.map(c => c[1]); const opSource = makeSourceRange(op); const lhsAsArg = { type: type_1.RType.Argument, location: lhs.location, value: lhs, name: undefined, lexeme: lhs.lexeme, info: {} }; if (op.type == 'special') { return { type: type_1.RType.FunctionCall, location: opSource, lexeme: node.text, functionName: { type: type_1.RType.Symbol, location: opSource, lexeme: op.text, content: op.text, namespace: undefined, info: {} }, arguments: [lhsAsArg, { type: type_1.RType.Argument, location: rhs.location, value: rhs, name: undefined, lexeme: rhs.lexeme, info: {} }], named: true, infixSpecial: true, info: { additionalTokens: comments } }; } else if (op.text === '|>') { return { type: type_1.RType.Pipe, location: opSource, lhs: lhsAsArg, rhs, lexeme: op.text, ...defaultInfo, info: { fullRange: range, additionalTokens: comments, fullLexeme: node.text } }; } else { return { type: type_1.RType.BinaryOp, location: opSource, lhs, rhs, operator: op.text, lexeme: op.text, info: { fullRange: range, additionalTokens: comments, fullLexeme: node.text } }; } } case tree_sitter_types_1.TreeSitterType.UnaryOperator: { const [op, operand] = nonErrorChildren(node); return { type: type_1.RType.UnaryOp, operand: convertTreeNode(operand), location: makeSourceRange(op), operator: op.text, lexeme: op.text, ...defaultInfo }; } case tree_sitter_types_1.TreeSitterType.NamespaceOperator: { const [lhs, /* :: or ::: */ , rhs] = nonErrorChildren(node); return { type: type_1.RType.Symbol, location: makeSourceRange(rhs), content: rhs.text, lexeme: rhs.text, namespace: lhs.text, ...defaultInfo }; } case '(': case ')': case tree_sitter_types_1.TreeSitterType.Na: case tree_sitter_types_1.TreeSitterType.Null: case tree_sitter_types_1.TreeSitterType.Dots: case tree_sitter_types_1.TreeSitterType.DotDotI: case tree_sitter_types_1.TreeSitterType.Identifier: case tree_sitter_types_1.TreeSitterType.Return: return { type: type_1.RType.Symbol, location: range, content: (0, strings_1.startAndEndsWith)(node.text, '`') ? node.text.slice(1, -1) : (0, retriever_1.removeRQuotes)(node.text), lexeme: node.text, namespace: undefined, ...defaultInfo }; case tree_sitter_types_1.TreeSitterType.IfStatement: { const [ifNode, /* ( */ , condition, /* ) */ , then, /* else */ , ...otherwise] = nonErrorChildren(node); const filteredOtherwise = otherwise.filter(n => n.type !== tree_sitter_types_1.TreeSitterType.ElseStatement); return { type: type_1.RType.IfThenElse, condition: convertTreeNode(condition), then: (0, normalize_meta_1.ensureExpressionList)(convertTreeNode(then)), otherwise: filteredOtherwise.length > 0 ? (0, normalize_meta_1.ensureExpressionList)(convertTreeNode(filteredOtherwise[0])) : undefined, location: makeSourceRange(ifNode), lexeme: ifNode.text, ...defaultInfo }; } case tree_sitter_types_1.TreeSitterType.ForStatement: { const children = nonErrorChildren(node); const forNode = children[0]; // we follow with a ( const variable = getNodesUntil(children, 'in', 2); // we follow with the "in" const sequence = getNodesUntil(children, ')', 2 + variable.length + 1); // we follow with a ( const body = children[2 + variable.length + 1 + sequence.length + 1]; const [variableComments, [variableNode]] = splitComments(variable); const [sequenceComments, [sequenceNode]] = splitComments(sequence); return { type: type_1.RType.ForLoop, variable: { type: type_1.RType.Symbol, location: makeSourceRange(variableNode), content: (0, retriever_1.removeRQuotes)(variableNode.text), lexeme: variableNode.text, namespace: undefined, info: { fullRange: undefined, additionalTokens: [], fullLexeme: undefined } }, vector: convertTreeNode(sequenceNode), body: (0, normalize_meta_1.ensureExpressionList)(convertTreeNode(body)), location: makeSourceRange(forNode), lexeme: forNode.text, info: { fullRange: range, additionalTokens: [...variableComments, ...sequenceComments].map(c => c[1]), fullLexeme: node.text } }; } case tree_sitter_types_1.TreeSitterType.WhileStatement: { const [whileNode, /* ( */ , condition, /* ) */ , body] = nonErrorChildren(node); return { type: type_1.RType.WhileLoop, condition: convertTreeNode(condition), body: (0, normalize_meta_1.ensureExpressionList)(convertTreeNode(body)), location: makeSourceRange(whileNode), lexeme: whileNode.text, ...defaultInfo }; } case tree_sitter_types_1.TreeSitterType.RepeatStatement: { const [repeatNode, body] = nonErrorChildren(node); return { type: type_1.RType.RepeatLoop, body: (0, normalize_meta_1.ensureExpressionList)(convertTreeNode(body)), location: makeSourceRange(repeatNode), lexeme: repeatNode.text, ...defaultInfo }; } case tree_sitter_types_1.TreeSitterType.Call: { const [func, argsParentheses] = nonErrorChildren(node); // tree-sitter wraps next and break in a function call, but we don't, so unwrap if (func.type === tree_sitter_types_1.TreeSitterType.Next || func.type == tree_sitter_types_1.TreeSitterType.Break) { return { ...convertTreeNode(func), ...defaultInfo }; } const args = (0, arrays_1.splitArrayOn)(nonErrorChildren(argsParentheses).slice(1, -1), x => x.type === 'comma'); const funcRange = makeSourceRange(func); const call = { arguments: args.map(n => n.length == 0 ? r_function_call_1.EmptyArgument : convertTreeNode(n[0])), location: funcRange, lexeme: func.text, ...defaultInfo }; if (func.type === tree_sitter_types_1.TreeSitterType.Identifier || func.type === tree_sitter_types_1.TreeSitterType.String || func.type === tree_sitter_types_1.TreeSitterType.NamespaceOperator || func.type === tree_sitter_types_1.TreeSitterType.Return) { let funcNode = convertTreeNode(func); if (funcNode.type === type_1.RType.String) { funcNode = { ...funcNode, type: type_1.RType.Symbol, namespace: undefined, content: (0, retriever_1.removeRQuotes)(func.text) }; } return { ...call, type: type_1.RType.FunctionCall, functionName: { ...funcNode, info: { fullRange: range, additionalTokens: [], fullLexeme: node.text } }, named: true }; } else { return { ...call, type: type_1.RType.FunctionCall, calledFunction: convertTreeNode(func), named: undefined }; } } case tree_sitter_types_1.TreeSitterType.FunctionDefinition: { const [name, paramsParens, body] = nonErrorChildren(node); const params = (0, arrays_1.splitArrayOn)(paramsParens.children.slice(1, -1), x => x.type === 'comma'); return { type: type_1.RType.FunctionDefinition, parameters: params.map(n => convertTreeNode(n[0])), body: (0, normalize_meta_1.ensureExpressionList)(convertTreeNode(body)), location: makeSourceRange(name), lexeme: name.text, ...defaultInfo }; } case tree_sitter_types_1.TreeSitterType.String: return { type: type_1.RType.String, location: range, content: (0, convert_values_1.string2ts)(node.text), lexeme: node.text, ...defaultInfo }; case tree_sitter_types_1.TreeSitterType.Float: case tree_sitter_types_1.TreeSitterType.Integer: case tree_sitter_types_1.TreeSitterType.Complex: case tree_sitter_types_1.TreeSitterType.Inf: case tree_sitter_types_1.TreeSitterType.Nan: return { type: type_1.RType.Number, location: range, content: (0, convert_values_1.number2ts)(node.text), lexeme: node.text, ...defaultInfo }; case tree_sitter_types_1.TreeSitterType.True: case tree_sitter_types_1.TreeSitterType.False: return { type: type_1.RType.Logical, location: range, content: (0, convert_values_1.boolean2ts)(node.text), lexeme: node.text, ...defaultInfo }; case tree_sitter_types_1.TreeSitterType.Break: case tree_sitter_types_1.TreeSitterType.Next: return { type: node.type == tree_sitter_types_1.TreeSitterType.Break ? type_1.RType.Break : type_1.RType.Next, location: range, lexeme: node.text, ...defaultInfo }; case tree_sitter_types_1.TreeSitterType.Subset: case tree_sitter_types_1.TreeSitterType.Subset2: { // subset has children like a and [x] const [func, content] = nonErrorChildren(node); // bracket is now [ or [[ and argsClosing is x] or x]] const [bracket, ...argsClosing] = nonErrorChildren(content); const args = (0, arrays_1.splitArrayOn)(argsClosing.slice(0, -1), x => x.type === 'comma'); return { type: type_1.RType.Access, operator: bracket.text, accessed: convertTreeNode(func), access: args.map(n => n.length == 0 ? r_function_call_1.EmptyArgument : convertTreeNode(n[0])), location: makeSourceRange(bracket), lexeme: bracket.text, ...defaultInfo }; } case tree_sitter_types_1.TreeSitterType.ExtractOperator: { const [lhs, operator, rhs] = nonErrorChildren(node); const rhsRange = makeSourceRange(rhs); return { type: type_1.RType.Access, operator: operator.text, accessed: convertTreeNode(lhs), access: [{ type: type_1.RType.Argument, name: undefined, value: { ...convertTreeNode(rhs), ...defaultInfo }, location: rhsRange, lexeme: rhs.text, info: { fullRange: rhsRange, additionalTokens: [], fullLexeme: rhs.text } }], location: makeSourceRange(operator), lexeme: operator.text, ...defaultInfo }; } case tree_sitter_types_1.TreeSitterType.Parameter: { const children = nonErrorChildren(node); const name = children[0]; const nameRange = makeSourceRange(name); let defaultValue = undefined; if (children.length == 3) { defaultValue = convertTreeNode(children[2]); } return { type: type_1.RType.Parameter, name: { type: type_1.RType.Symbol, location: nameRange, namespace: undefined, content: name.text, lexeme: name.text, info: { fullRange: range, additionalTokens: [], fullLexeme: name.text } }, special: name.text === '...', defaultValue, location: nameRange, lexeme: name.text, info: { fullRange: range, additionalTokens: [], fullLexeme: name.text } }; } case tree_sitter_types_1.TreeSitterType.Argument: { const children = nonErrorChildren(node); if (children.length == 1) { const [arg] = children; return { type: type_1.RType.Argument, name: undefined, value: convertTreeNode(arg), location: range, lexeme: node.text, ...defaultInfo }; } else { const [nameNode, /* = */ , valueNode] = children; let name = convertTreeNode(nameNode); // unescape argument names if (name.type === type_1.RType.String) { name = { ...name, type: type_1.RType.Symbol, content: name.content.str, namespace: undefined }; } else if ((0, strings_1.startAndEndsWith)(name.content, '`')) { name.content = name.content.slice(1, -1); } const nameRange = makeSourceRange(nameNode); return { type: type_1.RType.Argument, name: name, value: valueNode ? convertTreeNode(valueNode) : undefined, location: nameRange, lexeme: nameNode.text, info: { fullRange: nameRange, additionalTokens: [], fullLexeme: nameNode.text } }; } } case tree_sitter_types_1.TreeSitterType.Comment: return { type: type_1.RType.Comment, location: range, content: node.text.slice(1), lexeme: node.text, ...defaultInfo }; default: throw new normalizer_data_1.ParseError(`unexpected node type ${node.type} at ${JSON.stringify(range)}`); } } function makeSourceRange(node) { return [ // tree-sitter is 0-based but we want 1-based node.startPosition.row + 1, node.startPosition.column + 1, // tree-sitter's end position is one off from ours, so we don't add 1 here node.endPosition.row + 1, node.endPosition.column ]; } function splitComments(nodes) { const comments = []; const others = []; for (const node of nodes) { if (node.type === tree_sitter_types_1.TreeSitterType.Comment) { comments.push([node, { type: type_1.RType.Comment, location: makeSourceRange(node), content: node.text.slice(1), lexeme: node.text, info: { additionalTokens: [], fullLexeme: node.text } }]); } else { others.push(node); } } return [comments, others]; } function linkCommentsToNextNodes(nodes, comments) { const remain = []; for (const [commentSyntaxNode, commentNode] of comments) { let sibling; if (commentSyntaxNode.previousSibling?.endIndex === commentSyntaxNode.startIndex) { // if there is a sibling on the same line, we link the comment to that node sibling = commentSyntaxNode.previousSibling; } else { sibling = commentSyntaxNode.nextSibling; while (sibling && sibling.type === tree_sitter_types_1.TreeSitterType.Comment) { sibling = sibling.nextSibling; } } // if there is no valid sibling, we just link the comment to the first node (see normalize-expressions.ts) const [, node] = (sibling ? nodes.find(([s]) => s.equals(sibling)) : undefined) ?? nodes[0] ?? []; if (node) { node.info.additionalTokens ??= []; node.info.additionalTokens.push(commentNode); } else { remain.push([commentSyntaxNode, commentNode]); } } return remain; } function getNodesUntil(nodes, type, startIndex = 0) { const ret = []; for (let i = startIndex; i < nodes.length; i++) { if (nodes[i].type === type) { break; } ret.push(nodes[i]); } return ret; } //# sourceMappingURL=tree-sitter-normalize.js.map