UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

361 lines 17.1 kB
"use strict"; /** * Provides a top-level slicer that can be used to slice code *and* retrieve stats. * @module */ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.BenchmarkSlicer = exports.benchmarkLogger = void 0; const stopwatch_1 = require("./stopwatch"); const fs_1 = __importDefault(require("fs")); const log_1 = require("../util/log"); const assert_1 = require("../util/assert"); const strings_1 = require("../util/strings"); const default_pipelines_1 = require("../core/steps/pipeline/default-pipelines"); const retriever_1 = require("../r-bridge/retriever"); const collect_all_1 = require("../slicing/criterion/collect-all"); const type_1 = require("../r-bridge/lang-4.x/ast/model/type"); const visitor_1 = require("../r-bridge/lang-4.x/ast/model/processing/visitor"); const size_of_1 = require("./stats/size-of"); const shell_1 = require("../r-bridge/shell"); const tree_sitter_types_1 = require("../r-bridge/lang-4.x/tree-sitter/tree-sitter-types"); const tree_sitter_executor_1 = require("../r-bridge/lang-4.x/tree-sitter/tree-sitter-executor"); const vertex_1 = require("../dataflow/graph/vertex"); const arrays_1 = require("../util/arrays"); /** * The logger to be used for benchmarking as a global object. */ exports.benchmarkLogger = log_1.log.getSubLogger({ name: 'benchmark' }); class BenchmarkSlicer { /** Measures all data recorded *once* per slicer (complete setup up to the dataflow graph creation) */ commonMeasurements = new stopwatch_1.Measurements(); perSliceMeasurements = new Map(); deltas = new Map(); parserName; stats; loadedXml; dataflow; normalizedAst; totalStopwatch; finished = false; // Yes, this is unclean, but we know that we assign the executor during the initialization and this saves us from having to check for nullability every time executor = null; parser = null; constructor(parserName) { this.totalStopwatch = this.commonMeasurements.start('total'); this.parserName = parserName; } /** * Initialize the slicer on the given request. * Can only be called once for each instance. */ async init(request, autoSelectIf, threshold) { (0, assert_1.guard)(this.stats === undefined, 'cannot initialize the slicer twice'); // we know these are in sync so we just cast to one of them this.parser = await this.commonMeasurements.measure('initialize R session', async () => { if (this.parserName === 'r-shell') { return new shell_1.RShell(); } else { await tree_sitter_executor_1.TreeSitterExecutor.initTreeSitter(); return new tree_sitter_executor_1.TreeSitterExecutor(); } }); this.executor = (0, default_pipelines_1.createSlicePipeline)(this.parser, { request: { ...request }, criterion: [], autoSelectIf, threshold, }); this.loadedXml = (await this.measureCommonStep('parse', 'retrieve AST from R code')).parsed; this.normalizedAst = await this.measureCommonStep('normalize', 'normalize R AST'); this.dataflow = await this.measureCommonStep('dataflow', 'produce dataflow information'); this.executor.switchToRequestStage(); await this.calculateStatsAfterInit(request); } async calculateStatsAfterInit(request) { const loadedContent = request.request === 'text' ? request.content : fs_1.default.readFileSync(request.content, 'utf-8'); let numberOfRTokens; let numberOfRTokensNoComments; if (this.parser.name === 'r-shell') { // retrieve number of R tokens - flowr_parsed should still contain the last parsed code numberOfRTokens = await (0, retriever_1.retrieveNumberOfRTokensOfLastParse)(this.parser); numberOfRTokensNoComments = await (0, retriever_1.retrieveNumberOfRTokensOfLastParse)(this.parser, true); } else { const countChildren = function (node, ignoreComments = false) { let ret = node.type === tree_sitter_types_1.TreeSitterType.Comment && ignoreComments ? 0 : 1; for (const child of node.children) { ret += countChildren(child, ignoreComments); } return ret; }; const root = this.loadedXml.rootNode; numberOfRTokens = countChildren(root); numberOfRTokensNoComments = countChildren(root, true); } (0, assert_1.guard)(this.normalizedAst !== undefined, 'normalizedAst should be defined after initialization'); (0, assert_1.guard)(this.dataflow !== undefined, 'dataflow should be defined after initialization'); // collect dataflow graph size const vertices = [...this.dataflow.graph.vertices(true)]; let numberOfEdges = 0; let numberOfCalls = 0; let numberOfDefinitions = 0; for (const [n, info] of vertices) { const outgoingEdges = this.dataflow.graph.outgoingEdges(n); numberOfEdges += outgoingEdges?.size ?? 0; if (info.tag === 'function-call') { numberOfCalls++; } else if (info.tag === 'function-definition') { numberOfDefinitions++; } } let nodes = 0; let nodesNoComments = 0; let commentChars = 0; let commentCharsNoWhitespace = 0; (0, visitor_1.visitAst)(this.normalizedAst.ast, t => { nodes++; const comments = t.info.additionalTokens?.filter(t => t.type === type_1.RType.Comment); if (comments && comments.length > 0) { const content = comments.map(c => c.lexeme ?? '').join(''); commentChars += content.length; commentCharsNoWhitespace += (0, strings_1.withoutWhitespace)(content).length; } else { nodesNoComments++; } return false; }); const storedVertexIndices = this.countStoredVertexIndices(); const storedEnvIndices = this.countStoredEnvIndices(); const overwrittenIndices = storedVertexIndices - storedEnvIndices; const split = loadedContent.split('\n'); const nonWhitespace = (0, strings_1.withoutWhitespace)(loadedContent).length; this.stats = { perSliceMeasurements: this.perSliceMeasurements, memory: this.deltas, request, input: { numberOfLines: split.length, numberOfNonEmptyLines: split.filter(l => l.trim().length > 0).length, numberOfCharacters: loadedContent.length, numberOfCharactersNoComments: loadedContent.length - commentChars, numberOfNonWhitespaceCharacters: nonWhitespace, numberOfNonWhitespaceCharactersNoComments: nonWhitespace - commentCharsNoWhitespace, numberOfRTokens: numberOfRTokens, numberOfRTokensNoComments: numberOfRTokensNoComments, numberOfNormalizedTokens: nodes, numberOfNormalizedTokensNoComments: nodesNoComments }, dataflow: { numberOfNodes: [...this.dataflow.graph.vertices(true)].length, numberOfEdges: numberOfEdges, numberOfCalls: numberOfCalls, numberOfFunctionDefinitions: numberOfDefinitions, sizeOfObject: (0, size_of_1.getSizeOfDfGraph)(this.dataflow.graph), storedVertexIndices: storedVertexIndices, storedEnvIndices: storedEnvIndices, overwrittenIndices: overwrittenIndices, }, // these are all properly initialized in finish() commonMeasurements: new Map(), retrieveTimePerToken: { raw: 0, normalized: 0 }, normalizeTimePerToken: { raw: 0, normalized: 0 }, dataflowTimePerToken: { raw: 0, normalized: 0 }, totalCommonTimePerToken: { raw: 0, normalized: 0 } }; } /** * Counts the number of stored indices in the dataflow graph created by the pointer analysis. */ countStoredVertexIndices() { return this.countStoredIndices(this.dataflow?.out.map(ref => ref) ?? []); } /** * Counts the number of stored indices in the dataflow graph created by the pointer analysis. */ countStoredEnvIndices() { return this.countStoredIndices(this.dataflow?.environment.current.memory.values() ?.flatMap(def => def) .map(def => def) ?? []); } /** * Counts the number of stored indices in the passed definitions. */ countStoredIndices(definitions) { let numberOfIndices = 0; for (const reference of definitions) { if (reference.indicesCollection) { numberOfIndices += this.countIndices(reference.indicesCollection); } } return numberOfIndices; } /** * Recursively counts the number of indices and sub-indices in the given collection. */ countIndices(collection) { let numberOfIndices = 0; for (const indices of collection ?? []) { for (const index of indices.indices) { numberOfIndices++; if ((0, vertex_1.isParentContainerIndex)(index)) { numberOfIndices += this.countIndices(index.subIndices); } } } return numberOfIndices; } /** * Slice for the given {@link SlicingCriteria}. * @see SingleSlicingCriterion * * @returns The per slice stats retrieved for this slicing criteria */ async slice(...slicingCriteria) { exports.benchmarkLogger.trace(`try to slice for criteria ${JSON.stringify(slicingCriteria)}`); this.guardActive(); (0, assert_1.guard)(!this.perSliceMeasurements.has(slicingCriteria), 'do not slice the same criteria combination twice'); const measurements = new stopwatch_1.Measurements(); const stats = { measurements: undefined, slicingCriteria: [], numberOfDataflowNodesSliced: 0, timesHitThreshold: 0, reconstructedCode: { code: '', linesWithAutoSelected: 0 } }; this.perSliceMeasurements.set(slicingCriteria, stats); this.executor.updateRequest({ criterion: slicingCriteria }); const totalStopwatch = measurements.start('total'); const slicedOutput = await this.measureSliceStep('slice', measurements, 'static slicing'); stats.slicingCriteria = [...slicedOutput.decodedCriteria]; stats.reconstructedCode = await this.measureSliceStep('reconstruct', measurements, 'reconstruct code'); totalStopwatch.stop(); exports.benchmarkLogger.debug(`Produced code for ${JSON.stringify(slicingCriteria)}: ${stats.reconstructedCode.code}`); const results = this.executor.getResults(false); if (exports.benchmarkLogger.settings.minLevel >= 3 /* LogLevel.Info */) { exports.benchmarkLogger.info(`mapped slicing criteria: ${slicedOutput.decodedCriteria.map(c => { const node = results.normalize.idMap.get(c.id); return `\n- id: ${c.id}, location: ${JSON.stringify(node?.location)}, lexeme: ${JSON.stringify(node?.lexeme)}`; }).join('')}`); } // if it is not in the dataflow graph it was kept to be safe and should not count to the included nodes stats.numberOfDataflowNodesSliced = [...slicedOutput.result].filter(id => results.dataflow.graph.hasVertex(id, false)).length; stats.timesHitThreshold = slicedOutput.timesHitThreshold; stats.measurements = measurements.get(); return { stats, slice: slicedOutput, code: stats.reconstructedCode }; } /** Bridging the gap between the new internal and the old names for the benchmarking */ async measureCommonStep(expectedStep, keyToMeasure) { const memoryInit = process.memoryUsage(); const { result } = await this.commonMeasurements.measureAsync(keyToMeasure, () => this.executor.nextStep(expectedStep)); const memoryEnd = process.memoryUsage(); this.deltas.set(keyToMeasure, { heap: memoryEnd.heapUsed - memoryInit.heapUsed, rss: memoryEnd.rss - memoryInit.rss, external: memoryEnd.external - memoryInit.external, buffs: memoryEnd.arrayBuffers - memoryInit.arrayBuffers }); return result; } async measureSliceStep(expectedStep, measure, keyToMeasure) { const { result } = await measure.measureAsync(keyToMeasure, () => this.executor.nextStep(expectedStep)); return result; } guardActive() { (0, assert_1.guard)(this.stats !== undefined && !this.finished, 'need to call init before, and can not do after finish!'); } /** * Call {@link slice} for all slicing criteria that match the given filter. * See {@link collectAllSlicingCriteria} for details. * <p> * the `report` function will be called *before* each *individual* slice is performed. * * @returns The number of slices that were produced * * @see collectAllSlicingCriteria * @see SlicingCriteriaFilter */ async sliceForAll(filter, report = () => { }, options = {}) { const { sampleCount, maxSliceCount, sampleStrategy } = { sampleCount: -1, maxSliceCount: -1, sampleStrategy: 'random', ...options }; this.guardActive(); let count = 0; let allCriteria = [...(0, collect_all_1.collectAllSlicingCriteria)(this.normalizedAst.ast, filter)]; // Cancel slicing if the number of slices exceeds the limit if (maxSliceCount > 0 && allCriteria.length > maxSliceCount) { return -allCriteria.length; } if (sampleCount > 0) { if (sampleStrategy === 'equidistant') { allCriteria = (0, arrays_1.equidistantSampling)(allCriteria, sampleCount, 'ceil'); } else { allCriteria.sort(() => Math.random() - 0.5); allCriteria.length = Math.min(allCriteria.length, sampleCount); } } for (const slicingCriteria of allCriteria) { report(count, allCriteria.length, allCriteria); await this.slice(...slicingCriteria); count++; } return count; } /** * Retrieves the final stats and closes the shell session. * Can be called multiple times to retrieve the stored stats, but will only close the session once (the first time). */ finish() { (0, assert_1.guard)(this.stats !== undefined, 'need to call init before finish'); if (!this.finished) { this.commonMeasurements.measure('close R session', () => this.parser.close()); this.totalStopwatch.stop(); this.finished = true; } this.stats.commonMeasurements = this.commonMeasurements.get(); const retrieveTime = Number(this.stats.commonMeasurements.get('retrieve AST from R code')); const normalizeTime = Number(this.stats.commonMeasurements.get('normalize R AST')); const dataflowTime = Number(this.stats.commonMeasurements.get('produce dataflow information')); this.stats.retrieveTimePerToken = { raw: retrieveTime / this.stats.input.numberOfRTokens, normalized: retrieveTime / this.stats.input.numberOfNormalizedTokens }; this.stats.normalizeTimePerToken = { raw: normalizeTime / this.stats.input.numberOfRTokens, normalized: normalizeTime / this.stats.input.numberOfNormalizedTokens }; this.stats.dataflowTimePerToken = { raw: dataflowTime / this.stats.input.numberOfRTokens, normalized: dataflowTime / this.stats.input.numberOfNormalizedTokens }; this.stats.totalCommonTimePerToken = { raw: (retrieveTime + normalizeTime + dataflowTime) / this.stats.input.numberOfRTokens, normalized: (retrieveTime + normalizeTime + dataflowTime) / this.stats.input.numberOfNormalizedTokens }; return { stats: this.stats, parse: typeof this.loadedXml === 'string' ? this.loadedXml : JSON.stringify(this.loadedXml), dataflow: this.dataflow, normalize: this.normalizedAst }; } /** * Only call in case of an error - if the session must be closed and the benchmark itself is to be considered failed/dead. */ ensureSessionClosed() { this.parser?.close(); } } exports.BenchmarkSlicer = BenchmarkSlicer; //# sourceMappingURL=slicer.js.map