UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

303 lines 16.3 kB
"use strict"; var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) { if (k2 === undefined) k2 = k; var desc = Object.getOwnPropertyDescriptor(m, k); if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) { desc = { enumerable: true, get: function() { return m[k]; } }; } Object.defineProperty(o, k2, desc); }) : (function(o, m, k, k2) { if (k2 === undefined) k2 = k; o[k2] = m[k]; })); var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) { Object.defineProperty(o, "default", { enumerable: true, value: v }); }) : function(o, v) { o["default"] = v; }); var __importStar = (this && this.__importStar) || (function () { var ownKeys = function(o) { ownKeys = Object.getOwnPropertyNames || function (o) { var ar = []; for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k; return ar; }; return ownKeys(o); }; return function (mod) { if (mod && mod.__esModule) return mod; var result = {}; if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]); __setModuleDefault(result, mod); return result; }; })(); var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.summarizeSlicerStats = summarizeSlicerStats; exports.summarizeSummarizedMeasurement = summarizeSummarizedMeasurement; exports.summarizeSummarizedReductions = summarizeSummarizedReductions; exports.summarizeSummarizedTimePerToken = summarizeSummarizedTimePerToken; exports.summarizeTimePerToken = summarizeTimePerToken; const tmp = __importStar(require("tmp")); const fs_1 = __importDefault(require("fs")); const defaultmap_1 = require("../../../util/collections/defaultmap"); const log_1 = require("../../../util/log"); const strings_1 = require("../../../util/text/strings"); const summarizer_1 = require("../../../util/summarizer"); const assert_1 = require("../../../util/assert"); const shell_1 = require("../../../r-bridge/shell"); const retriever_1 = require("../../../r-bridge/retriever"); const visitor_1 = require("../../../r-bridge/lang-4.x/ast/model/processing/visitor"); const type_1 = require("../../../r-bridge/lang-4.x/ast/model/type"); const arrays_1 = require("../../../util/collections/arrays"); const tempfile = (() => { let _tempfile = undefined; return () => { if (_tempfile === undefined) { _tempfile = tmp.fileSync({ postfix: '.R', keep: false }); process.on('beforeExit', () => _tempfile?.removeCallback()); } return _tempfile; }; })(); function safeDivPercentage(a, b) { if (isNaN(a) || isNaN(b)) { return undefined; } else if (b === 0) { return a === 0 ? 0 : undefined; } else { const result = 1 - (a / b); if (isNaN(result)) { log_1.log.error(`NaN for ${a} and ${b}\n`); return undefined; } else { return result; } } } function calculateReductionForSlice(input, dataflow, perSlice, ignoreFluff) { const perSliceLines = ignoreFluff ? perSlice.nonEmptyLines : perSlice.lines; const inputLines = ignoreFluff ? input.numberOfNonEmptyLines : input.numberOfLines; return { numberOfLines: safeDivPercentage(perSliceLines, inputLines), numberOfLinesNoAutoSelection: safeDivPercentage(perSliceLines - perSlice.linesWithAutoSelected, inputLines), numberOfCharacters: ignoreFluff ? safeDivPercentage(perSlice.charactersNoComments, input.numberOfCharactersNoComments) : safeDivPercentage(perSlice.characters, input.numberOfCharacters), numberOfNonWhitespaceCharacters: ignoreFluff ? safeDivPercentage(perSlice.nonWhitespaceCharactersNoComments, input.numberOfNonWhitespaceCharactersNoComments) : safeDivPercentage(perSlice.nonWhitespaceCharacters, input.numberOfNonWhitespaceCharacters), numberOfRTokens: ignoreFluff ? safeDivPercentage(perSlice.tokensNoComments, input.numberOfRTokensNoComments) : safeDivPercentage(perSlice.tokens, input.numberOfRTokens), numberOfNormalizedTokens: ignoreFluff ? safeDivPercentage(perSlice.normalizedTokensNoComments, input.numberOfNormalizedTokensNoComments) : safeDivPercentage(perSlice.normalizedTokens, input.numberOfNormalizedTokens), numberOfDataflowNodes: safeDivPercentage(perSlice.dataflowNodes, dataflow.numberOfNodes) }; } /** * Summarizes the given stats by calculating the min, max, median, mean, and the standard deviation for each measurement. * @see Slicer */ async function summarizeSlicerStats(stats, report = () => { }) { const collect = new defaultmap_1.DefaultMap(() => []); const sizeOfSliceCriteria = []; const reParseShellSession = new shell_1.RShell(); const sliceTimes = []; const reconstructTimes = []; const totalTimes = []; const reductions = []; const reductionsNoFluff = []; let failedOutputs = 0; const sliceSize = { lines: [], nonEmptyLines: [], linesWithAutoSelected: [], characters: [], charactersNoComments: [], nonWhitespaceCharacters: [], nonWhitespaceCharactersNoComments: [], tokens: [], tokensNoComments: [], normalizedTokens: [], normalizedTokensNoComments: [], dataflowNodes: [] }; let timesHitThreshold = 0; for (const [criteria, perSliceStat] of stats.perSliceMeasurements) { report(criteria, perSliceStat); for (const measure of perSliceStat.measurements) { collect.get(measure[0]).push(Number(measure[1])); } sizeOfSliceCriteria.push(perSliceStat.slicingCriteria.length); timesHitThreshold += perSliceStat.timesHitThreshold > 0 ? 1 : 0; const { code: output, linesWithAutoSelected } = perSliceStat.reconstructedCode; sliceSize.linesWithAutoSelected.push(linesWithAutoSelected); const split = output.split('\n'); const lines = split.length; const nonEmptyLines = split.filter(l => l.trim().length > 0).length; sliceSize.lines.push(lines); sliceSize.nonEmptyLines.push(nonEmptyLines); sliceSize.characters.push(output.length); const nonWhitespace = (0, strings_1.withoutWhitespace)(output).length; sliceSize.nonWhitespaceCharacters.push(nonWhitespace); // reparse the output to get the number of tokens try { // there seem to be encoding issues, therefore, we dump to a temp file fs_1.default.writeFileSync(tempfile().name, output); const reParsed = await (0, retriever_1.retrieveNormalizedAstFromRCode)({ request: 'file', content: tempfile().name }, reParseShellSession); let numberOfNormalizedTokens = 0; let numberOfNormalizedTokensNoComments = 0; let commentChars = 0; let commentCharsNoWhitespace = 0; (0, visitor_1.visitAst)(reParsed.ast, t => { numberOfNormalizedTokens++; const comments = t.info.additionalTokens?.filter(t => t.type === type_1.RType.Comment); if (comments && comments.length > 0) { const content = comments.map(c => c.lexeme ?? '').join(''); commentChars += content.length; commentCharsNoWhitespace += (0, strings_1.withoutWhitespace)(content).length; } else { numberOfNormalizedTokensNoComments++; } return false; }); sliceSize.normalizedTokens.push(numberOfNormalizedTokens); sliceSize.normalizedTokensNoComments.push(numberOfNormalizedTokensNoComments); sliceSize.charactersNoComments.push(output.length - commentChars); sliceSize.nonWhitespaceCharactersNoComments.push(nonWhitespace - commentCharsNoWhitespace); const numberOfRTokens = await (0, retriever_1.retrieveNumberOfRTokensOfLastParse)(reParseShellSession); sliceSize.tokens.push(numberOfRTokens); const numberOfRTokensNoComments = await (0, retriever_1.retrieveNumberOfRTokensOfLastParse)(reParseShellSession, true); sliceSize.tokensNoComments.push(numberOfRTokensNoComments); const perSlice = { lines: lines, nonEmptyLines: nonEmptyLines, characters: output.length, charactersNoComments: output.length - commentChars, nonWhitespaceCharacters: nonWhitespace, nonWhitespaceCharactersNoComments: nonWhitespace - commentCharsNoWhitespace, linesWithAutoSelected: linesWithAutoSelected, tokens: numberOfRTokens, tokensNoComments: numberOfRTokensNoComments, normalizedTokens: numberOfNormalizedTokens, normalizedTokensNoComments: numberOfNormalizedTokensNoComments, dataflowNodes: perSliceStat.numberOfDataflowNodesSliced }; reductions.push(calculateReductionForSlice(stats.input, stats.dataflow, perSlice, false)); reductionsNoFluff.push(calculateReductionForSlice(stats.input, stats.dataflow, perSlice, true)); const sliceTime = Number(perSliceStat.measurements.get('static slicing')); const reconstructTime = Number(perSliceStat.measurements.get('reconstruct code')); sliceTimes.push({ raw: sliceTime / numberOfRTokens, normalized: sliceTime / numberOfNormalizedTokens }); reconstructTimes.push({ raw: reconstructTime / numberOfRTokens, normalized: reconstructTime / numberOfNormalizedTokens }); totalTimes.push({ raw: (sliceTime + reconstructTime) / numberOfRTokens, normalized: (sliceTime + reconstructTime) / numberOfNormalizedTokens }); } catch { console.error(` ! Failed to re-parse the output of the slicer for ${JSON.stringify(criteria)}`); //, e console.error(` Code: ${output}\n`); failedOutputs++; } sliceSize.dataflowNodes.push(perSliceStat.numberOfDataflowNodesSliced); } // summarize all measurements: const summarized = new Map(); for (const [criterion, measurements] of collect.entries()) { summarized.set(criterion, (0, summarizer_1.summarizeMeasurement)(measurements)); } reParseShellSession.close(); return { ...stats, perSliceMeasurements: { numberOfSlices: stats.perSliceMeasurements.size, sliceCriteriaSizes: (0, summarizer_1.summarizeMeasurement)(sizeOfSliceCriteria), measurements: summarized, failedToRepParse: failedOutputs, timesHitThreshold, reduction: summarizeReductions(reductions), reductionNoFluff: summarizeReductions(reductionsNoFluff), sliceTimePerToken: summarizeTimePerToken(sliceTimes), reconstructTimePerToken: summarizeTimePerToken(reconstructTimes), totalPerSliceTimePerToken: summarizeTimePerToken(totalTimes), sliceSize: { lines: (0, summarizer_1.summarizeMeasurement)(sliceSize.lines), nonEmptyLines: (0, summarizer_1.summarizeMeasurement)(sliceSize.nonEmptyLines), characters: (0, summarizer_1.summarizeMeasurement)(sliceSize.characters), charactersNoComments: (0, summarizer_1.summarizeMeasurement)(sliceSize.charactersNoComments), nonWhitespaceCharacters: (0, summarizer_1.summarizeMeasurement)(sliceSize.nonWhitespaceCharacters), nonWhitespaceCharactersNoComments: (0, summarizer_1.summarizeMeasurement)(sliceSize.nonWhitespaceCharactersNoComments), linesWithAutoSelected: (0, summarizer_1.summarizeMeasurement)(sliceSize.linesWithAutoSelected), tokens: (0, summarizer_1.summarizeMeasurement)(sliceSize.tokens), tokensNoComments: (0, summarizer_1.summarizeMeasurement)(sliceSize.tokensNoComments), normalizedTokens: (0, summarizer_1.summarizeMeasurement)(sliceSize.normalizedTokens), normalizedTokensNoComments: (0, summarizer_1.summarizeMeasurement)(sliceSize.normalizedTokensNoComments), dataflowNodes: (0, summarizer_1.summarizeMeasurement)(sliceSize.dataflowNodes) } } }; } function summarizeSummarizedMeasurement(data) { data = data.filter(assert_1.isNotUndefined); const min = Math.min(...data.map(d => d.min).filter(assert_1.isNotUndefined)); const max = Math.max(...data.map(d => d.max).filter(assert_1.isNotUndefined)); // calculate median of medians (don't just average the median!) const medians = data.map(d => d.median).filter(assert_1.isNotUndefined).sort((a, b) => a - b); const median = medians[Math.floor(medians.length / 2)]; const mean = (0, arrays_1.arraySum)(data.map(d => d.mean).filter(assert_1.isNotUndefined)) / data.length; // Method 1 of https://www.statology.org/averaging-standard-deviations/ const std = Math.sqrt((0, arrays_1.arraySum)(data.map(d => d.std ** 2).filter(assert_1.isNotUndefined)) / data.length); const total = (0, arrays_1.arraySum)(data.map(d => d.total).filter(assert_1.isNotUndefined)); return { min, max, median, mean, std, total }; } function summarizeSummarizedReductions(reductions) { return { numberOfDataflowNodes: summarizeSummarizedMeasurement(reductions.map(r => r.numberOfDataflowNodes)), numberOfLines: summarizeSummarizedMeasurement(reductions.map(r => r.numberOfLines)), numberOfCharacters: summarizeSummarizedMeasurement(reductions.map(r => r.numberOfCharacters)), numberOfNonWhitespaceCharacters: summarizeSummarizedMeasurement(reductions.map(r => r.numberOfNonWhitespaceCharacters)), numberOfLinesNoAutoSelection: summarizeSummarizedMeasurement(reductions.map(r => r.numberOfLinesNoAutoSelection)), numberOfNormalizedTokens: summarizeSummarizedMeasurement(reductions.map(r => r.numberOfNormalizedTokens)), numberOfRTokens: summarizeSummarizedMeasurement(reductions.map(r => r.numberOfRTokens)) }; } function summarizeReductions(reductions) { return { numberOfLines: (0, summarizer_1.summarizeMeasurement)(reductions.map(r => r.numberOfLines).filter(assert_1.isNotUndefined)), numberOfLinesNoAutoSelection: (0, summarizer_1.summarizeMeasurement)(reductions.map(r => r.numberOfLinesNoAutoSelection).filter(assert_1.isNotUndefined)), numberOfCharacters: (0, summarizer_1.summarizeMeasurement)(reductions.map(r => r.numberOfCharacters).filter(assert_1.isNotUndefined)), numberOfNonWhitespaceCharacters: (0, summarizer_1.summarizeMeasurement)(reductions.map(r => r.numberOfNonWhitespaceCharacters).filter(assert_1.isNotUndefined)), numberOfRTokens: (0, summarizer_1.summarizeMeasurement)(reductions.map(r => r.numberOfRTokens).filter(assert_1.isNotUndefined)), numberOfNormalizedTokens: (0, summarizer_1.summarizeMeasurement)(reductions.map(r => r.numberOfNormalizedTokens).filter(assert_1.isNotUndefined)), numberOfDataflowNodes: (0, summarizer_1.summarizeMeasurement)(reductions.map(r => r.numberOfDataflowNodes).filter(assert_1.isNotUndefined)) }; } function summarizeSummarizedTimePerToken(times) { return { raw: summarizeSummarizedMeasurement(times.map(t => t.raw)), normalized: summarizeSummarizedMeasurement(times.map(t => t.normalized)), }; } function summarizeTimePerToken(times) { return { raw: (0, summarizer_1.summarizeMeasurement)(times.map(t => t.raw)), normalized: (0, summarizer_1.summarizeMeasurement)(times.map(t => t.normalized)), }; } //# sourceMappingURL=process.js.map