UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

351 lines (269 loc) 18.7 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.WikiInterface = void 0; const shell_1 = require("../r-bridge/shell"); const doc_files_1 = require("./doc-util/doc-files"); const doc_cli_option_1 = require("./doc-util/doc-cli-option"); const doc_server_message_1 = require("./doc-util/doc-server-message"); const doc_data_server_messages_1 = require("./data/server/doc-data-server-messages"); const doc_code_1 = require("./doc-util/doc-code"); const retriever_1 = require("../r-bridge/retriever"); const doc_docker_1 = require("./doc-util/doc-docker"); const doc_repl_1 = require("./doc-util/doc-repl"); const doc_dfg_1 = require("./doc-util/doc-dfg"); const config_1 = require("../config"); const schema_1 = require("../util/schema"); const ansi_1 = require("../util/text/ansi"); const flowr_main_options_1 = require("../cli/flowr-main-options"); const doc_issue_1 = require("./doc-util/doc-issue"); const doc_structure_1 = require("./doc-util/doc-structure"); const doc_maker_1 = require("./wiki-mk/doc-maker"); const doc_writing_code_1 = require("./data/interface/doc-writing-code"); const built_in_proc_name_1 = require("../dataflow/environments/built-in-proc-name"); async function explainServer(parser) { (0, doc_data_server_messages_1.documentAllServerMessages)(); return ` As explained in the [Overview](${doc_files_1.FlowrWikiBaseRef}/Overview), you can simply run the [TCP](https://de.wikipedia.org/wiki/Transmission_Control_Protocol)&nbsp;server by adding the ${(0, doc_cli_option_1.getCliLongOptionOf)('flowr', 'server', true)} flag (and, due to the interactive mode, exit with the conventional <kbd>CTRL</kbd>+<kbd>C</kbd>). Currently, every connection is handled by the same underlying \`${shell_1.RShell.name}\` - so the server is not designed to handle many clients at a time. Additionally, the server is not well guarded against attacks (e.g., you can theoretically spawn an arbitrary number of&nbsp;${shell_1.RShell.name} sessions on the target machine). Every message has to be given in a single line (i.e., without a newline in-between) and end with a newline character. Nevertheless, we will pretty-print example given in the following segments for the ease of reading. ${(0, doc_structure_1.block)({ type: 'NOTE', content: ` The default ${(0, doc_cli_option_1.getCliLongOptionOf)('flowr', 'server', false)} uses a simple [TCP](https://de.wikipedia.org/wiki/Transmission_Control_Protocol) connection. If you want _flowR_ to expose a [WebSocket](https://de.wikipedia.org/wiki/WebSocket) server instead, add the ${(0, doc_cli_option_1.getCliLongOptionOf)('flowr', 'ws', false)} flag (i.e., ${(0, doc_cli_option_1.multipleCliOptions)('flowr', 'server', 'ws')}) when starting _flowR_ from the command line. ` })} ${await (0, doc_server_message_1.printServerMessages)(parser)} ### 📡 Ways of Connecting If you are interested in clients that communicate with _flowR_, please check out the [R adapter](${doc_files_1.FlowrGithubBaseRef}/flowr-r-adapter) as well as the [Visual Studio Code extension](${doc_files_1.FlowrGithubBaseRef}/vscode-flowr). <ol> <li> <a id="using-netcat-without-websocket"></a>Using Netcat <details> <summary>Without Websocket</summary> Suppose, you want to launch the server using a docker container. Then, start the server by (forwarding the internal default port): ${(0, doc_code_1.codeBlock)('shell', `docker run -p1042:1042 -it --rm ${doc_docker_1.DockerName} --server`)} Now, using a tool like [_netcat_](https://linux.die.net/man/1/nc) to connect: ${(0, doc_code_1.codeBlock)('shell', 'nc 127.0.0.1 1042')} Within the started session, type the following message (as a single line) and press enter to see the response: ${(0, doc_code_1.codeBlock)('json', (0, retriever_1.removeRQuotes)(JSON.stringify({ type: 'request-file-analysis', content: 'x <- 1', id: '1' })))} </details> </li> <li> Using Python <details> <summary>Without Websocket</summary> In Python, a similar process would look like this. After starting the server as with using [netcat](#using-netcat-without-websocket), you can use the following script to connect: ${(0, doc_code_1.codeBlock)('python', ` import socket with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.connect(('127.0.0.1', 1042)) print(s.recv(4096)) # for the hello message s.send(b'${(0, retriever_1.removeRQuotes)(JSON.stringify({ type: 'request-file-analysis', content: 'x <- 1', id: '1' }))}\\n') print(s.recv(65536)) # for the response (please use a more sophisticated mechanism) `)} </details> </li> </ol> `; } async function explainRepl(parser, ctx) { return ` > [!NOTE] > To execute arbitrary R commands with a repl request, _flowR_ has to be started explicitly with ${ctx.cliOption('flowr', 'r-session-access')}. > Please be aware that this introduces a security risk and note that this relies on the ${ctx.linkPage('wiki/Engines', '`r-shell` engine')} . Although primarily meant for users to explore, there is nothing which forbids simply calling _flowR_ as a subprocess to use standard-in, -output, and -error for communication (although you can access the REPL using the server as well, with the [REPL Request](#message-request-repl-execution) message). The read-eval-print loop&nbsp;(REPL) works relatively simple. You can submit an expression (using <kbd>Enter</kbd>), which is interpreted as an R&nbsp;expression by default but interpreted as a *command* if it starts with a colon (\`:\`). The best command to get started with the REPL is ${ctx.replCmd('help')}. Besides, you can leave the REPL either with the command ${ctx.replCmd('quit')} or by pressing <kbd>Ctrl</kbd>+<kbd>C</kbd> twice. When writing a *command*, you may press <kbd>Tab</kbd> to get a list of completions, if available. Multiple commands can be entered in a single line by separating them with a semicolon (\`;\`), e.g. \`:parse "x<-2"; :df*\`. If a command is given without R code, the REPL will re-use R code given in a previous command. The prior example will hence return first the parsed AST of the program and then the dataflow graph for \`"x <- 2"\`. > [!NOTE] > If you develop flowR, you may want to launch the repl using the \`npm run main-dev\` command, this way, you get a non-minified version of flowR with debug information and hot-reloading of source files. <details> <summary>Available Commands</summary> We currently offer the following commands (this with a \`[*]\` suffix are available with and without the star): ${(0, doc_repl_1.printReplHelpAsMarkdownTable)()} </details> ${(0, doc_structure_1.block)({ type: 'TIP', content: ` As indicated by the examples before, all REPL commands that operate on code keep track of the state. Hence, if you run a command like ${(0, doc_cli_option_1.getReplCommand)('dataflow*')} without providing R code, the REPL will re-use the R code provided in a previous command. Likewise, doing this will benefit from incrementality! If you request the dataflow graph with \`:df* x <- 2 * y\` and then want to see the parsed AST with \`:parse\`, the REPL will re-use previously obtained information and not re-parse the code again. ` })} Generally, many commands offer shortcut versions in the REPL. Many queries, for example, offer a shortened format (see the example below). Of special note, the ${ctx.linkPage('wiki/Query API', 'Config Query', 'Config-Query')} can be used to also modify the currently active configuration of _flowR_ within the REPL (see the ${ctx.linkPage('wiki/Query API', 'wiki page', 'Config-Query')} for more information). ### Example: Retrieving the Dataflow Graph To retrieve a URL to the [mermaid](https://mermaid.js.org/) diagram of the dataflow of a given expression, use ${ctx.replCmd('dataflow*')} (or ${ctx.replCmd('dataflow')} to get the mermaid code in the cli): ${await (0, doc_repl_1.documentReplSession)(parser, [{ command: ':dataflow* y <- 1 + x', description: `Retrieve the dataflow graph of the expression \`y <- 1 + x\`. It looks like this:\n${await (0, doc_dfg_1.printDfGraphForCode)(parser, 'y <- 1 + x')}` }])} For small graphs like this, ${ctx.replCmd('dataflowascii')} also provides an ASCII representation directly in the REPL: ${await (0, doc_repl_1.documentReplSession)(parser, [{ command: ':df! y <- 1 + x', description: 'Retrieve the dataflow graph of the expression `y <- 1 + x` as ASCII art.' }], { openOutput: true })} For the slicing with ${ctx.replCmd('slicer')}, you have access to the same [magic comments](#slice-magic-comments) as with the [slice request](#message-request-slice). ### Example: Interfacing with the File System Many commands that allow for an R-expression (like ${ctx.replCmd('dataflow*')}) allow for a file as well if the argument starts with \`${retriever_1.fileProtocol}\`. If you are working from the root directory of the _flowR_ repository, the following gives you the parsed AST of the example file using the ${ctx.replCmd('parse')} command: ${await (0, doc_repl_1.documentReplSession)(parser, [{ command: `:parse ${retriever_1.fileProtocol}test/testfiles/example.R`, description: `Retrieve the parsed AST of the example file. <details> <summary>File Content</summary> ${(0, doc_code_1.codeBlock)('r', (0, doc_files_1.getFileContentFromRoot)('test/testfiles/example.R'))} </details> As _flowR_ directly transforms this AST the output focuses on being human-readable instead of being machine-readable. ` }])} ### Example: Run a Query You can run any query supported by _flowR_ using the ${ctx.replCmd('query')} command. For example, to obtain the shapes of all data frames in a given piece of code, you can run: ${await (0, doc_repl_1.documentReplSession)(parser, [{ command: ':query @df-shape "x <- data.frame(a = 1:10, b = 1:10)\\ny <- x$a"', description: 'Retrieve the shapes of all data frames in the given code.' }], { openOutput: true })} To run the linter on a file, you can use (in this example, we just issue the \`dead-code\` linter on a small piece of code): ${await (0, doc_repl_1.documentReplSession)(parser, [{ command: ':query @linter rules:dead-code "if(FALSE) x <- 2"', description: 'Run the linter on the given code, with only the `dead-code` rule enabled.' }], { openOutput: true })} For more information on the available queries, please check out the ${ctx.linkPage('wiki/Query API', 'Query API')}. `; } function explainConfigFile(ctx) { return ` When running _flowR_, you may want to specify some behaviors with a dedicated configuration file. By default, flowR looks for a file named \`${flowr_main_options_1.defaultConfigFile}\` in the current working directory (or any higher directory). You can also specify a different file with ${(0, doc_cli_option_1.getCliLongOptionOf)('flowr', 'config-file')} or pass the configuration inline using ${(0, doc_cli_option_1.getCliLongOptionOf)('flowr', 'config-json')}. To inspect the current configuration, you can run flowr with the ${(0, doc_cli_option_1.getCliLongOptionOf)('flowr', 'verbose')} flag, or use the \`config\` [Query](${doc_files_1.FlowrWikiBaseRef}/Query%20API). Within the REPL this works by running the following: ${(0, doc_code_1.codeBlock)('shell', ':query @config')} To work with the ${ctx.link(config_1.FlowrConfig)} you can use the provided helper objects alongside its methods like ${ctx.linkO(config_1.FlowrConfig, 'amend')}. The following summarizes the configuration options: - \`ignoreSourceCalls\`: If set to \`true\`, _flowR_ will ignore source calls when analyzing the code, i.e., ignoring the inclusion of other files. - \`semantics\`: allows to configure the way _flowR_ handles R, although we currently only support \`semantics/environment/overwriteBuiltIns\`. You may use this to overwrite _flowR_'s handling of built-in function and even completely clear the preset definitions shipped with flowR. See [Configure BuiltIn Semantics](#configure-builtin-semantics) for more information. - \`solver\`: allows to configure how _flowR_ resolves variables and their values (currently we support: ${Object.values(config_1.VariableResolve).map(v => `\`${v}\``).join(', ')}), as well as if pointer analysis should be active. - \`engines\`: allows to configure the engines used by _flowR_ to interact with R code. See the [Engines wiki page](${doc_files_1.FlowrWikiBaseRef}/Engines) for more information. - \`defaultEngine\`: allows to specify the default engine to use for interacting with R code. If not set, an arbitrary engine from the specified list will be used. - \`abstractInterpretation\`: allows to configure how _flowR_ performs abstract interpretation, although we currently only support data frame shape inference through abstract interpretation. So you can configure _flowR_ by adding a file like the following: <details> <summary>Example Configuration File</summary> ${(0, doc_code_1.codeBlock)('json', JSON.stringify({ ignoreSourceCalls: true, semantics: { environment: { overwriteBuiltIns: { definitions: [ { type: 'function', names: ['foo'], processor: built_in_proc_name_1.BuiltInProcName.Assignment, config: {} } ] } } }, repl: { quickStats: false, dfProcessorHeat: false }, project: { resolveUnknownPathsOnDisk: true }, engines: [{ type: 'r-shell' }], solver: { variables: config_1.VariableResolve.Alias, evalStrings: true, resolveSource: { dropPaths: config_1.DropPathsOption.No, ignoreCapitalization: true, inferWorkingDirectory: config_1.InferWorkingDirectory.ActiveScript, searchPath: [] }, instrument: {}, slicer: { threshold: 50 } }, abstractInterpretation: { wideningThreshold: 4, dataFrame: { maxColNames: 20, readLoadedData: { readExternalFiles: true, maxReadLines: 1_000_000 } } } }, null, 2))} </details> <details> <a id='configure-builtin-semantics'></a> <summary>Configure Built-In Semantics</summary> \`semantics/environment/overwriteBuiltins\` accepts two keys: - \`loadDefaults\` (boolean, initially \`true\`): If set to \`true\`, the default built-in definitions are loaded before applying the custom definitions. Setting this flag to \`false\` explicitly disables the loading of the default definitions. - \`definitions\` (array, initially empty): Allows to overwrite or define new built-in elements. Each object within must have a \`type\` which is one of the below. Furthermore, they may define a string array of \`names\` which specifies the identifiers to bind the definitions to. You may use \`assumePrimitive\` to specify whether _flowR_ should assume that this is a primitive non-library definition (so you probably just do not want to specify the key). | Type | Description | Example | | --------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------- | | \`constant\` | Additionally allows for a \`value\` this should resolve to. | \`{ type: 'constant', names: ['NULL', 'NA'], value: null }\` | | \`function\` | Is a rather flexible way to define and bind built-in functions. For the time, we do not have extensive documentation to cover all the cases, so please either consult the sources with the \`default-builtin-config.ts\` or open a [new issue](${doc_issue_1.NewIssueUrl}). | \`{ type: 'function', names: ['next'], processor: '${built_in_proc_name_1.BuiltInProcName.Default}', config: { cfg: ExitPointType.Next } }\` | | \`replacement\` | A comfortable way to specify replacement functions like \`$<-\` or \`names<-\`. \`suffixes\` describes the... suffixes to attach automatically. | \`{ type: 'replacement', suffixes: ['<-', '<<-'], names: ['[', '[['] }\` | </details> <details> <summary style='color:gray'>Full Configuration-File Schema</summary> ${(0, schema_1.describeSchema)(config_1.FlowrConfig.Schema, ansi_1.markdownFormatter)} </details> `; } /** * https://github.com/flowr-analysis/flowr/wiki/Interface */ class WikiInterface extends doc_maker_1.DocMaker { constructor() { super('wiki/Interface.md', module.filename, 'interface'); } async text({ shell, ctx, treeSitter }) { return ` Although far from being as detailed as the in-depth explanation of ${ctx.linkPage('wiki/Core', '_flowR_')}, this wiki page explains how to interface with _flowR_ in more detail. In general, command line arguments and other options provide short descriptions on hover over. * [💻 Using the REPL](#using-the-repl) * [⚙️ Configuring FlowR](#configuring-flowr) * [⚒️ Writing Code](#writing-code) * [💬 Communicating with the Server](#communicating-with-the-server) <a id='using-the-repl'></a> ## 💻 Using the REPL ${await explainRepl(treeSitter, ctx)} <a id='configuring-flowr'></a> ## ⚙️ Configuring FlowR ${explainConfigFile(ctx)} <a id='writing-code'></a> ## ⚒️ Writing Code ${(0, doc_writing_code_1.explainWritingCode)(shell, ctx)} <a id='communicating-with-the-server'></a> ## 💬 Communicating with the Server ${await explainServer(shell)} `; } } exports.WikiInterface = WikiInterface; //# sourceMappingURL=wiki-interface.js.map