@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
361 lines • 17.1 kB
JavaScript
"use strict";
/**
* Provides a top-level slicer that can be used to slice code *and* retrieve stats.
* @module
*/
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.BenchmarkSlicer = exports.benchmarkLogger = void 0;
const stopwatch_1 = require("./stopwatch");
const fs_1 = __importDefault(require("fs"));
const log_1 = require("../util/log");
const assert_1 = require("../util/assert");
const strings_1 = require("../util/strings");
const default_pipelines_1 = require("../core/steps/pipeline/default-pipelines");
const retriever_1 = require("../r-bridge/retriever");
const collect_all_1 = require("../slicing/criterion/collect-all");
const type_1 = require("../r-bridge/lang-4.x/ast/model/type");
const visitor_1 = require("../r-bridge/lang-4.x/ast/model/processing/visitor");
const size_of_1 = require("./stats/size-of");
const shell_1 = require("../r-bridge/shell");
const tree_sitter_types_1 = require("../r-bridge/lang-4.x/tree-sitter/tree-sitter-types");
const tree_sitter_executor_1 = require("../r-bridge/lang-4.x/tree-sitter/tree-sitter-executor");
const vertex_1 = require("../dataflow/graph/vertex");
const arrays_1 = require("../util/arrays");
/**
* The logger to be used for benchmarking as a global object.
*/
exports.benchmarkLogger = log_1.log.getSubLogger({ name: 'benchmark' });
class BenchmarkSlicer {
/** Measures all data recorded *once* per slicer (complete setup up to the dataflow graph creation) */
commonMeasurements = new stopwatch_1.Measurements();
perSliceMeasurements = new Map();
deltas = new Map();
parserName;
stats;
loadedXml;
dataflow;
normalizedAst;
totalStopwatch;
finished = false;
// Yes, this is unclean, but we know that we assign the executor during the initialization and this saves us from having to check for nullability every time
executor = null;
parser = null;
constructor(parserName) {
this.totalStopwatch = this.commonMeasurements.start('total');
this.parserName = parserName;
}
/**
* Initialize the slicer on the given request.
* Can only be called once for each instance.
*/
async init(request, autoSelectIf, threshold) {
(0, assert_1.guard)(this.stats === undefined, 'cannot initialize the slicer twice');
// we know these are in sync so we just cast to one of them
this.parser = await this.commonMeasurements.measure('initialize R session', async () => {
if (this.parserName === 'r-shell') {
return new shell_1.RShell();
}
else {
await tree_sitter_executor_1.TreeSitterExecutor.initTreeSitter();
return new tree_sitter_executor_1.TreeSitterExecutor();
}
});
this.executor = (0, default_pipelines_1.createSlicePipeline)(this.parser, {
request: { ...request },
criterion: [],
autoSelectIf,
threshold,
});
this.loadedXml = (await this.measureCommonStep('parse', 'retrieve AST from R code')).parsed;
this.normalizedAst = await this.measureCommonStep('normalize', 'normalize R AST');
this.dataflow = await this.measureCommonStep('dataflow', 'produce dataflow information');
this.executor.switchToRequestStage();
await this.calculateStatsAfterInit(request);
}
async calculateStatsAfterInit(request) {
const loadedContent = request.request === 'text' ? request.content : fs_1.default.readFileSync(request.content, 'utf-8');
let numberOfRTokens;
let numberOfRTokensNoComments;
if (this.parser.name === 'r-shell') {
// retrieve number of R tokens - flowr_parsed should still contain the last parsed code
numberOfRTokens = await (0, retriever_1.retrieveNumberOfRTokensOfLastParse)(this.parser);
numberOfRTokensNoComments = await (0, retriever_1.retrieveNumberOfRTokensOfLastParse)(this.parser, true);
}
else {
const countChildren = function (node, ignoreComments = false) {
let ret = node.type === tree_sitter_types_1.TreeSitterType.Comment && ignoreComments ? 0 : 1;
for (const child of node.children) {
ret += countChildren(child, ignoreComments);
}
return ret;
};
const root = this.loadedXml.rootNode;
numberOfRTokens = countChildren(root);
numberOfRTokensNoComments = countChildren(root, true);
}
(0, assert_1.guard)(this.normalizedAst !== undefined, 'normalizedAst should be defined after initialization');
(0, assert_1.guard)(this.dataflow !== undefined, 'dataflow should be defined after initialization');
// collect dataflow graph size
const vertices = [...this.dataflow.graph.vertices(true)];
let numberOfEdges = 0;
let numberOfCalls = 0;
let numberOfDefinitions = 0;
for (const [n, info] of vertices) {
const outgoingEdges = this.dataflow.graph.outgoingEdges(n);
numberOfEdges += outgoingEdges?.size ?? 0;
if (info.tag === 'function-call') {
numberOfCalls++;
}
else if (info.tag === 'function-definition') {
numberOfDefinitions++;
}
}
let nodes = 0;
let nodesNoComments = 0;
let commentChars = 0;
let commentCharsNoWhitespace = 0;
(0, visitor_1.visitAst)(this.normalizedAst.ast, t => {
nodes++;
const comments = t.info.additionalTokens?.filter(t => t.type === type_1.RType.Comment);
if (comments && comments.length > 0) {
const content = comments.map(c => c.lexeme ?? '').join('');
commentChars += content.length;
commentCharsNoWhitespace += (0, strings_1.withoutWhitespace)(content).length;
}
else {
nodesNoComments++;
}
return false;
});
const storedVertexIndices = this.countStoredVertexIndices();
const storedEnvIndices = this.countStoredEnvIndices();
const overwrittenIndices = storedVertexIndices - storedEnvIndices;
const split = loadedContent.split('\n');
const nonWhitespace = (0, strings_1.withoutWhitespace)(loadedContent).length;
this.stats = {
perSliceMeasurements: this.perSliceMeasurements,
memory: this.deltas,
request,
input: {
numberOfLines: split.length,
numberOfNonEmptyLines: split.filter(l => l.trim().length > 0).length,
numberOfCharacters: loadedContent.length,
numberOfCharactersNoComments: loadedContent.length - commentChars,
numberOfNonWhitespaceCharacters: nonWhitespace,
numberOfNonWhitespaceCharactersNoComments: nonWhitespace - commentCharsNoWhitespace,
numberOfRTokens: numberOfRTokens,
numberOfRTokensNoComments: numberOfRTokensNoComments,
numberOfNormalizedTokens: nodes,
numberOfNormalizedTokensNoComments: nodesNoComments
},
dataflow: {
numberOfNodes: [...this.dataflow.graph.vertices(true)].length,
numberOfEdges: numberOfEdges,
numberOfCalls: numberOfCalls,
numberOfFunctionDefinitions: numberOfDefinitions,
sizeOfObject: (0, size_of_1.getSizeOfDfGraph)(this.dataflow.graph),
storedVertexIndices: storedVertexIndices,
storedEnvIndices: storedEnvIndices,
overwrittenIndices: overwrittenIndices,
},
// these are all properly initialized in finish()
commonMeasurements: new Map(),
retrieveTimePerToken: { raw: 0, normalized: 0 },
normalizeTimePerToken: { raw: 0, normalized: 0 },
dataflowTimePerToken: { raw: 0, normalized: 0 },
totalCommonTimePerToken: { raw: 0, normalized: 0 }
};
}
/**
* Counts the number of stored indices in the dataflow graph created by the pointer analysis.
*/
countStoredVertexIndices() {
return this.countStoredIndices(this.dataflow?.out.map(ref => ref) ?? []);
}
/**
* Counts the number of stored indices in the dataflow graph created by the pointer analysis.
*/
countStoredEnvIndices() {
return this.countStoredIndices(this.dataflow?.environment.current.memory.values()
?.flatMap(def => def)
.map(def => def) ?? []);
}
/**
* Counts the number of stored indices in the passed definitions.
*/
countStoredIndices(definitions) {
let numberOfIndices = 0;
for (const reference of definitions) {
if (reference.indicesCollection) {
numberOfIndices += this.countIndices(reference.indicesCollection);
}
}
return numberOfIndices;
}
/**
* Recursively counts the number of indices and sub-indices in the given collection.
*/
countIndices(collection) {
let numberOfIndices = 0;
for (const indices of collection ?? []) {
for (const index of indices.indices) {
numberOfIndices++;
if ((0, vertex_1.isParentContainerIndex)(index)) {
numberOfIndices += this.countIndices(index.subIndices);
}
}
}
return numberOfIndices;
}
/**
* Slice for the given {@link SlicingCriteria}.
* @see SingleSlicingCriterion
*
* @returns The per slice stats retrieved for this slicing criteria
*/
async slice(...slicingCriteria) {
exports.benchmarkLogger.trace(`try to slice for criteria ${JSON.stringify(slicingCriteria)}`);
this.guardActive();
(0, assert_1.guard)(!this.perSliceMeasurements.has(slicingCriteria), 'do not slice the same criteria combination twice');
const measurements = new stopwatch_1.Measurements();
const stats = {
measurements: undefined,
slicingCriteria: [],
numberOfDataflowNodesSliced: 0,
timesHitThreshold: 0,
reconstructedCode: {
code: '',
linesWithAutoSelected: 0
}
};
this.perSliceMeasurements.set(slicingCriteria, stats);
this.executor.updateRequest({ criterion: slicingCriteria });
const totalStopwatch = measurements.start('total');
const slicedOutput = await this.measureSliceStep('slice', measurements, 'static slicing');
stats.slicingCriteria = [...slicedOutput.decodedCriteria];
stats.reconstructedCode = await this.measureSliceStep('reconstruct', measurements, 'reconstruct code');
totalStopwatch.stop();
exports.benchmarkLogger.debug(`Produced code for ${JSON.stringify(slicingCriteria)}: ${stats.reconstructedCode.code}`);
const results = this.executor.getResults(false);
if (exports.benchmarkLogger.settings.minLevel >= 3 /* LogLevel.Info */) {
exports.benchmarkLogger.info(`mapped slicing criteria: ${slicedOutput.decodedCriteria.map(c => {
const node = results.normalize.idMap.get(c.id);
return `\n- id: ${c.id}, location: ${JSON.stringify(node?.location)}, lexeme: ${JSON.stringify(node?.lexeme)}`;
}).join('')}`);
}
// if it is not in the dataflow graph it was kept to be safe and should not count to the included nodes
stats.numberOfDataflowNodesSliced = [...slicedOutput.result].filter(id => results.dataflow.graph.hasVertex(id, false)).length;
stats.timesHitThreshold = slicedOutput.timesHitThreshold;
stats.measurements = measurements.get();
return {
stats,
slice: slicedOutput,
code: stats.reconstructedCode
};
}
/** Bridging the gap between the new internal and the old names for the benchmarking */
async measureCommonStep(expectedStep, keyToMeasure) {
const memoryInit = process.memoryUsage();
const { result } = await this.commonMeasurements.measureAsync(keyToMeasure, () => this.executor.nextStep(expectedStep));
const memoryEnd = process.memoryUsage();
this.deltas.set(keyToMeasure, {
heap: memoryEnd.heapUsed - memoryInit.heapUsed,
rss: memoryEnd.rss - memoryInit.rss,
external: memoryEnd.external - memoryInit.external,
buffs: memoryEnd.arrayBuffers - memoryInit.arrayBuffers
});
return result;
}
async measureSliceStep(expectedStep, measure, keyToMeasure) {
const { result } = await measure.measureAsync(keyToMeasure, () => this.executor.nextStep(expectedStep));
return result;
}
guardActive() {
(0, assert_1.guard)(this.stats !== undefined && !this.finished, 'need to call init before, and can not do after finish!');
}
/**
* Call {@link slice} for all slicing criteria that match the given filter.
* See {@link collectAllSlicingCriteria} for details.
* <p>
* the `report` function will be called *before* each *individual* slice is performed.
*
* @returns The number of slices that were produced
*
* @see collectAllSlicingCriteria
* @see SlicingCriteriaFilter
*/
async sliceForAll(filter, report = () => { }, options = {}) {
const { sampleCount, maxSliceCount, sampleStrategy } = { sampleCount: -1, maxSliceCount: -1, sampleStrategy: 'random', ...options };
this.guardActive();
let count = 0;
let allCriteria = [...(0, collect_all_1.collectAllSlicingCriteria)(this.normalizedAst.ast, filter)];
// Cancel slicing if the number of slices exceeds the limit
if (maxSliceCount > 0 && allCriteria.length > maxSliceCount) {
return -allCriteria.length;
}
if (sampleCount > 0) {
if (sampleStrategy === 'equidistant') {
allCriteria = (0, arrays_1.equidistantSampling)(allCriteria, sampleCount, 'ceil');
}
else {
allCriteria.sort(() => Math.random() - 0.5);
allCriteria.length = Math.min(allCriteria.length, sampleCount);
}
}
for (const slicingCriteria of allCriteria) {
report(count, allCriteria.length, allCriteria);
await this.slice(...slicingCriteria);
count++;
}
return count;
}
/**
* Retrieves the final stats and closes the shell session.
* Can be called multiple times to retrieve the stored stats, but will only close the session once (the first time).
*/
finish() {
(0, assert_1.guard)(this.stats !== undefined, 'need to call init before finish');
if (!this.finished) {
this.commonMeasurements.measure('close R session', () => this.parser.close());
this.totalStopwatch.stop();
this.finished = true;
}
this.stats.commonMeasurements = this.commonMeasurements.get();
const retrieveTime = Number(this.stats.commonMeasurements.get('retrieve AST from R code'));
const normalizeTime = Number(this.stats.commonMeasurements.get('normalize R AST'));
const dataflowTime = Number(this.stats.commonMeasurements.get('produce dataflow information'));
this.stats.retrieveTimePerToken = {
raw: retrieveTime / this.stats.input.numberOfRTokens,
normalized: retrieveTime / this.stats.input.numberOfNormalizedTokens
};
this.stats.normalizeTimePerToken = {
raw: normalizeTime / this.stats.input.numberOfRTokens,
normalized: normalizeTime / this.stats.input.numberOfNormalizedTokens
};
this.stats.dataflowTimePerToken = {
raw: dataflowTime / this.stats.input.numberOfRTokens,
normalized: dataflowTime / this.stats.input.numberOfNormalizedTokens
};
this.stats.totalCommonTimePerToken = {
raw: (retrieveTime + normalizeTime + dataflowTime) / this.stats.input.numberOfRTokens,
normalized: (retrieveTime + normalizeTime + dataflowTime) / this.stats.input.numberOfNormalizedTokens
};
return {
stats: this.stats,
parse: typeof this.loadedXml === 'string' ? this.loadedXml : JSON.stringify(this.loadedXml),
dataflow: this.dataflow,
normalize: this.normalizedAst
};
}
/**
* Only call in case of an error - if the session must be closed and the benchmark itself is to be considered failed/dead.
*/
ensureSessionClosed() {
this.parser?.close();
}
}
exports.BenchmarkSlicer = BenchmarkSlicer;
//# sourceMappingURL=slicer.js.map