@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
172 lines (131 loc) • 8.7 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.WikiNormalizedAst = void 0;
const doc_code_1 = require("./doc-util/doc-code");
const doc_normalized_ast_1 = require("./doc-util/doc-normalized-ast");
const doc_files_1 = require("./doc-util/doc-files");
const doc_cli_option_1 = require("./doc-util/doc-cli-option");
const doc_structure_1 = require("./doc-util/doc-structure");
const flowr_analyzer_1 = require("../project/flowr-analyzer");
const flowr_analyzer_builder_1 = require("../project/flowr-analyzer-builder");
const flowr_file_1 = require("../project/context/flowr-file");
const doc_maker_1 = require("./wiki-mk/doc-maker");
const roxygen_parse_1 = require("../r-bridge/roxygen2/roxygen-parse");
const r_binary_op_1 = require("../r-bridge/lang-4.x/ast/model/nodes/r-binary-op");
const model_1 = require("../r-bridge/lang-4.x/ast/model/model");
const r_project_1 = require("../r-bridge/lang-4.x/ast/model/nodes/r-project");
const r_expression_list_1 = require("../r-bridge/lang-4.x/ast/model/nodes/r-expression-list");
const stateful_fold_1 = require("../r-bridge/lang-4.x/ast/model/processing/stateful-fold");
async function simpleNormalizedAst(code) {
const analyzer = await new flowr_analyzer_builder_1.FlowrAnalyzerBuilder().build();
analyzer.addRequest(code);
const result = await analyzer.normalize();
return result.ast;
}
async function quickNormalizedAstMultipleFiles() {
const analyzer = await new flowr_analyzer_builder_1.FlowrAnalyzerBuilder()
.setEngine('tree-sitter')
.build();
analyzer.addFile(new flowr_file_1.FlowrInlineTextFile('foo.R', 'x <- 12; source("a.R")'));
analyzer.addFile(new flowr_file_1.FlowrInlineTextFile('a.R', 'y <- x + 3'));
analyzer.addFile(new flowr_file_1.FlowrInlineTextFile('b.R', 'print(x, y)'));
analyzer.addRequest({ request: 'file', content: 'a.R' }, { request: 'file', content: 'b.R' }, { request: 'file', content: 'foo.R' });
const n = await analyzer.normalize();
return n;
}
/**
* https://github.com/flowr-analysis/flowr/wiki/Normalized-AST
*/
class WikiNormalizedAst extends doc_maker_1.DocMaker {
constructor() {
super('wiki/Normalized AST.md', module.filename, 'normalized ast');
}
async text({ ctx, treeSitter }) {
return `
_flowR_ produces a normalized version of R's abstract syntax tree (AST),
offering the following benefits:
1. abstract away from intricacies of the R parser
2. provide a version-independent representation of the program
3. decorate the AST with additional information, e.g., parent relations and nesting information
In general, the mapping should be rather intuitive and focused primarily on the
syntactic structure of the program. As with other types in flowR, you get corresponding helper objects.
So if you want to work with an \`RBinaryOp\` node, you can use the ${ctx.link(r_binary_op_1.RBinaryOp)} object to check whether a node is an \`RBinaryOp\` and to access its fields
(e.g., with ${ctx.linkO(r_binary_op_1.RBinaryOp, 'is')}).
Consider the following example which shows the normalized AST of the code
${(0, doc_code_1.codeBlock)('r', 'x <- 2 * 3 + 1')}
Each node in the AST contains the type, the id, and the lexeme (if applicable).
Each edge is labeled with the type of the parent-child relationship (the "role").
${await (0, doc_normalized_ast_1.printNormalizedAstForCode)(treeSitter, 'x <- 2 * 3 + 1', { showCode: false, prefix: 'flowchart LR\n' })}
> [!TIP]
> If you want to investigate the normalized AST,
> you can either use the [Visual Studio Code extension](${doc_files_1.FlowrGithubBaseRef}/vscode-flowr) or the ${(0, doc_cli_option_1.getReplCommand)('normalize*')}
> command in the REPL (see the [Interface wiki page](${doc_files_1.FlowrWikiBaseRef}/Interface) for more information).
Indicative of the normalization is the root ${ctx.link(r_project_1.RProject)} node, which is present in every normalized AST
and provides the ${ctx.link(r_expression_list_1.RExpressionList)} nodes for each file in the project.
In general, we provide node types for:
1. literals (e.g., numbers and strings)
2. references (e.g., symbols, parameters and function calls)
3. constructs (e.g., loops and function definitions)
4. branches (e.g., \`next\` and \`break\`)
5. operators (e.g. \`+\`, \`-\`, and \`*\`)
<details>
<summary style="color:gray">Complete Class Diagram</summary>
Every node is a link, which directly refers to the implementation in the source code.
Grayed-out parts are used for structuring the AST, grouping together related nodes.
${(0, doc_code_1.codeBlock)('mermaid', ctx.mermaid(model_1.RNode))}
</details>
Node types are controlled by the ${ctx.link('RType')} enum (see ${(0, doc_files_1.getFilePathMd)('../r-bridge/lang-4.x/ast/model/type.ts')}),
which is used to distinguish between different types of nodes.
Additionally, every AST node is generic with respect to the \`Info\` type which allows for arbitrary decorations (e.g., parent inforamtion or dataflow constraints).
Most notably, the \`info\` field holds the \`id\` of the node, which is used to reference the node in the [dataflow graph](${doc_files_1.FlowrWikiBaseRef}/Dataflow%20Graph).
In summary, we have the following types:
${(0, doc_structure_1.details)('Normalized AST Node Types', ctx.hierarchy(model_1.RNode, { collapseFromNesting: Number.MAX_VALUE, ignoredTypes: ['Info', 'LogLevel'] }))}
The following segments intend to give you an overview of how to work with the normalized AST:
* [How to get a Normalized AST](#how-to-get-a-normalized-ast)
* [Traversing the Normalized AST](#traversing-the-normalized-ast)
> [!TIP]
> If you want to get more information on roxygen comments attached to AST nodes,
> check out ${ctx.link(roxygen_parse_1.parseRoxygenCommentsOfNode)}.
## How to Get a Normalized AST
As explained alongside the [Interface](${doc_files_1.FlowrWikiBaseRef}/Interface#creating-flowr-analyses) wiki page, you can use an instance of
${ctx.link(flowr_analyzer_1.FlowrAnalyzer)} to get the ${ctx.link('NormalizedAst')}:
${ctx.code(simpleNormalizedAst, { dropLinesStart: 1, dropLinesEnd: 2, hideDefinedAt: true })}
From the REPL, you can use the ${(0, doc_cli_option_1.getReplCommand)('normalize')} command.
### Multi-File Projects
With the ${ctx.link(flowr_analyzer_1.FlowrAnalyzer)}, you can analyze multiple files at once:
${ctx.code(quickNormalizedAstMultipleFiles, { dropLinesStart: 1, dropLinesEnd: 2, hideDefinedAt: true })}
Visualizing the resulting AST yields the following.
<details>
<summary style="color:gray">Mermaid Diagram</summary>
${(0, doc_normalized_ast_1.printNormalizedAst)((await quickNormalizedAstMultipleFiles()).ast, 'flowchart LR\n')}
</details>
## Traversing the Normalized AST
We provide a visitor to traverse the normalized AST.
Please note, that it usually operates on the ${ctx.link('RExpressionList')} level, and it is up to
you to decide how you want to traverse multiple files with a ${ctx.link('RProject')} in the AST (you can, for example, simplify flat-map over the files).
The ${ctx.link('RProject')} node cannot appear nested within other nodes, so you can safely assume that any child of a node is not an ${ctx.link('RProject')}.
${(0, doc_structure_1.block)({
type: 'TIP',
content: 'When working with names and identifiers, consider using the utilities provided with the ' + ctx.link('Identifier', undefined, { type: 'variable' }) + ' object.',
})}
### Visitors
If you want a simple visitor which traverses the AST, the ${ctx.linkO(model_1.RNode, 'visitAst')} function is a good starting point.
You may specify functions to be called whenever you enter and exit a node during the traversal, and any
computation is to be done by side effects.
For example, if you want to collect all the \`id\`s present within a normalized (sub-)AST,
as it is done by the ${ctx.linkO(model_1.RNode, 'collectAllIds')} (and corresponding ${ctx.linkO(model_1.RNode, 'collectAllIdsWithStop')}, ${ctx.linkO(r_project_1.RProject, 'collectAllIds')}, ...) function, you can use the following visitor:
${(0, doc_code_1.codeBlock)('ts', `
const ids = new Set<NodeId>();
visitAst(nodes, node => {
ids.add(node.info.id);
});
return ids;
`)}
### Stateful Fold
A stateful fold over the normalized AST can be performed with the ${ctx.link(stateful_fold_1.foldAstStateful)} function.
It allows you to specify a down function which is called during the down-pass and can pass information to child nodes, and fold functions which are called after the down-pass in conventional fold-fashion.
`;
}
}
exports.WikiNormalizedAst = WikiNormalizedAst;
//# sourceMappingURL=wiki-normalized-ast.js.map