@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
226 lines • 11.9 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.PipelineExecutor = void 0;
const assert_1 = require("../util/assert");
/**
* **Please note:** The {@link PipelineExecutor} is now considered to be a rather low-level API for flowR. While it still works
* and is the basis for all other layers, we strongly recommend using the {@link FlowrAnalyzer} and its {@link FlowrAnalyzerBuilder|builder}
* to create and use an analyzer instance that is pre-configured for your use-case.
*
* The pipeline executor allows to execute arbitrary {@link Pipeline|pipelines} in a step-by-step fashion.
* If you are not yet in the possession of a {@link Pipeline|pipeline}, you can use the {@link createPipeline} function
* to create one for yourself, based on the steps that you want to execute.
*
* Those steps are split into two phases or "stages" (which is the name that we will use in the following), represented
* by the {@link PipelineStepStage} type. These allow us to separate things that have to be done
* once per-file, e.g., actually parsing the AST, from those that we need to repeat 'once per request' (whatever this
* request may be). In other words, what can be cached between operations and what cannot.
*
* Furthermore, this executor follows an iterable fashion to be *as flexible as possible*
* (e.g., to be instrumented with measurements). So, you can use the pipeline executor like this:
*
* ```ts
* const stepper = new PipelineExecutor( ... )
* while(stepper.hasNextStep()) {
* await stepper.nextStep()
* }
*
* stepper.switchToRequestStage()
*
* while(stepper.hasNextStep()) {
* await stepper.nextStep()
* }
*
* const result = stepper.getResults()
* ```
*
* Of course, you might think, that this is rather overkill if you simply want to receive the result.
* And this is true. Therefore, if you do not want to perform some kind of magic in-between steps, you can use the
* **{@link allRemainingSteps}** function like this:
*
* ```ts
* const stepper = new PipelineExecutor( ... )
* const result = await stepper.allRemainingSteps()
* ```
*
* As the name suggests, you can combine this name with previous calls to {@link nextStep} to only execute the remaining
* steps in case, for whatever reason you only want to instrument some steps.
*
* By default, the {@link PipelineExecutor} does not offer an automatic way to repeat requests (mostly to prevent accidental errors).
* However, you can use the
* **{@link updateRequest}** function to reset the request steps and re-execute them for a new request. This allows something like the following:
*
* ```ts
* const stepper = new PipelineExecutor( ... )
* const result = await stepper.allRemainingSteps()
*
* stepper.updateRequest( ... )
* const result2 = await stepper.allRemainingSteps()
* ```
*
* **Example - Slicing With the Pipeline Executor**:
*
* Suppose, you want to... you know _slice_ a file (which was, at one point the origin of flowR), then you can
* either create a pipeline yourself with the respective steps, or you can use the {@link DEFAULT_SLICING_PIPELINE} (and friends).
* With it, slicing essentially becomes 'easy-as-pie':
*
* ```ts
* const slicer = new PipelineExecutor(DEFAULT_SLICING_PIPELINE, {
* parser: new RShell(),
* // of course, the criterion and request given here are just examples, you can use whatever you want to slice!
* criterion: ['2@b'],
* request: requestFromInput('b <- 3; x <- 5\ncat(b)'),
* })
* const result = await slicer.allRemainingSteps()
* ```
*
* But now, we want to slice for `x` in the first line as well! We can do that by adding:
*
* ```ts
* stepper.updateRequest({ criterion: ['1@x'] })
* const result2 = await stepper.allRemainingSteps()
* ```
* @note Even though using the pipeline executor introduces a small performance overhead, we consider
* it to be the baseline for performance benchmarking. It may very well be possible to squeeze out a little bit more by
* directly constructing the steps in the right order. However, we consider this to be negligible when compared with the time required
* for, for example, the dataflow analysis of larger files.
* @see PipelineExecutor#allRemainingSteps
* @see PipelineExecutor#nextStep
*/
class PipelineExecutor {
pipeline;
length;
input;
output = {};
currentExecutionStage = 0 /* PipelineStepStage.OncePerFile */;
stepCounter = 0;
/**
* Construct a new pipeline executor.
* The required additional input is specified by the {@link IPipelineStep#requiredInput|required input configuration} of each step in the `pipeline`.
*
* Please see {@link createDataflowPipeline} and friends for engine agnostic shortcuts to create a pipeline executor.
* And in general, please prefer using the {@link FlowrAnalyzer} and its {@link FlowrAnalyzerBuilder|builder} to create and use an analyzer instance.
* @param pipeline - The {@link Pipeline} to execute, probably created with {@link createPipeline}.
* @param input - External {@link PipelineInput|configuration and input} required to execute the given pipeline.
*/
constructor(pipeline, input) {
this.pipeline = pipeline;
this.length = pipeline.order.length;
this.input = input;
}
/**
* Retrieve the {@link Pipeline|pipeline} that is currently being.
*/
getPipeline() {
return this.pipeline;
}
/**
* Retrieve the current {@link PipelineStepStage|stage} the pipeline executor is in.
* @see currentExecutionStage
* @see switchToRequestStage
* @see PipelineStepStage
*/
getCurrentStage() {
return this.currentExecutionStage;
}
/**
* Switch to the next {@link PipelineStepStage|stage} of the pipeline executor.
*
* This will fail if either a step change is currently not valid (as not all steps have been executed),
* or if there is no next stage (i.e., the pipeline is already completed or in the last stage).
* @see PipelineExecutor
* @see getCurrentStage
*/
switchToRequestStage() {
(0, assert_1.guard)(this.stepCounter === this.pipeline.firstStepPerRequest, 'First need to complete all steps before switching');
(0, assert_1.guard)(this.currentExecutionStage === 0 /* PipelineStepStage.OncePerFile */, 'Cannot switch to next stage, already in per-request stage.');
this.currentExecutionStage = 1 /* PipelineStepStage.OncePerRequest */;
}
/**
* Returns the results of the pipeline.
* @param intermediate - Normally you can only receive the results *after* the stepper completed the step of interested.
* However, if you pass `true` to this parameter, you can also receive the results *before* the {@link PipelineExecutor|pipeline executor}
* completed, although the typing system then cannot guarantee which of the steps have already happened.
*/
getResults(intermediate = false) {
(0, assert_1.guard)(intermediate || this.stepCounter >= this.length, 'Without the intermediate flag, the pipeline must be completed before providing access to the results.');
return this.output;
}
/**
* Returns true only if
* 1) there are more {@link IPipelineStep|steps} to-do for the current {@link PipelineStepStage|stage} and
* 2) we have not yet reached the end of the {@link Pipeline|pipeline}.
*/
hasNextStep() {
return (this.stepCounter < this.length && this.currentExecutionStage !== 0 /* PipelineStepStage.OncePerFile */)
|| this.stepCounter < this.pipeline.firstStepPerRequest;
}
/**
* Execute the next {@link IPipelineStep|step} and return the name of the {@link IPipelineStep|step} that was executed,
* so you can guard if the {@link IPipelineStep|step} differs from what you are interested in.
* Furthermore, it returns the {@link IPipelineStep|step's} result.
* @param expectedStepName - A safeguard if you want to retrieve the result.
* If given, it causes the execution to fail if the next step is not the one you expect.
*
* _Without `expectedStepName`, please refrain from accessing the result, as you have no safeguards if the pipeline changes._
*/
async nextStep(expectedStepName) {
const start = Date.now();
const [step, result] = this._doNextStep(expectedStepName);
const awaitedResult = await result;
this.output[step] = { ...awaitedResult, '.meta': { timing: Date.now() - start } };
this.stepCounter++;
return { name: step, result: awaitedResult };
}
_doNextStep(expectedStepName) {
const step = this.pipeline.steps.get(this.pipeline.order[this.stepCounter]);
(0, assert_1.guard)(step !== undefined, () => `Cannot execute next step, step ${this.pipeline.order[this.stepCounter]} does not exist.`);
if (expectedStepName !== undefined) {
(0, assert_1.guard)(step.name === expectedStepName, () => `Cannot execute next step, expected step ${JSON.stringify(expectedStepName)} but got ${step.name}.`);
}
return [step.name, step.processor(this.output, this.input)];
}
/**
* This only makes sense if you have already run a request and want to re-use the per-file results for a new one.
* (or if for whatever reason, you did not pass information for the pipeline with the constructor).
* @param newRequestData - Data for the new request
*/
updateRequest(newRequestData) {
const requestStep = this.pipeline.firstStepPerRequest;
(0, assert_1.guard)(this.stepCounter >= requestStep, 'Cannot reset request prior to once-per-request stage');
this.input = {
...this.input,
...newRequestData
};
this.stepCounter = requestStep;
// clear the results for all steps with an index >= firstStepPerRequest, this is more of a sanity check
for (let i = requestStep; i < this.length; i++) {
this.output[this.pipeline.order[i]] = undefined;
}
}
/**
* Execute all remaining steps and automatically call {@link switchToRequestStage} if necessary.
* @param canSwitchStage - If true, automatically switch to the request stage if necessary
* (i.e., this is what you want if you have never executed {@link nextStep} and you want to execute *all* steps).
* However, passing false allows you to only execute the steps of the 'once-per-file' stage (i.e., the steps that can be cached).
* @note There is a small type difference if you pass 'false' and already have manually switched to the 'once-per-request' stage.
* Because now, the results of these steps are no longer part of the result type (although they are still included).
* In such a case, you may be better off with simply passing 'true' as the function will detect that the stage is already switched.
* We could solve this type problem by separating the {@link PipelineExecutor} class into two for each stage,
* but this would break the improved readability and unified handling of the executor that I wanted to achieve with this class.
*/
async allRemainingSteps(canSwitchStage = true) {
while (this.hasNextStep()) {
await this.nextStep();
}
if (canSwitchStage && this.stepCounter < this.length && this.currentExecutionStage === 0 /* PipelineStepStage.OncePerFile */) {
this.switchToRequestStage();
while (this.hasNextStep()) {
await this.nextStep();
}
}
return this.stepCounter < this.length ? this.getResults(true) : this.getResults();
}
}
exports.PipelineExecutor = PipelineExecutor;
//# sourceMappingURL=pipeline-executor.js.map