@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
303 lines • 16.3 kB
JavaScript
;
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
var desc = Object.getOwnPropertyDescriptor(m, k);
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
desc = { enumerable: true, get: function() { return m[k]; } };
}
Object.defineProperty(o, k2, desc);
}) : (function(o, m, k, k2) {
if (k2 === undefined) k2 = k;
o[k2] = m[k];
}));
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
Object.defineProperty(o, "default", { enumerable: true, value: v });
}) : function(o, v) {
o["default"] = v;
});
var __importStar = (this && this.__importStar) || (function () {
var ownKeys = function(o) {
ownKeys = Object.getOwnPropertyNames || function (o) {
var ar = [];
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
return ar;
};
return ownKeys(o);
};
return function (mod) {
if (mod && mod.__esModule) return mod;
var result = {};
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
__setModuleDefault(result, mod);
return result;
};
})();
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.summarizeSlicerStats = summarizeSlicerStats;
exports.summarizeSummarizedMeasurement = summarizeSummarizedMeasurement;
exports.summarizeSummarizedReductions = summarizeSummarizedReductions;
exports.summarizeSummarizedTimePerToken = summarizeSummarizedTimePerToken;
exports.summarizeTimePerToken = summarizeTimePerToken;
const tmp = __importStar(require("tmp"));
const fs_1 = __importDefault(require("fs"));
const defaultmap_1 = require("../../../util/defaultmap");
const log_1 = require("../../../util/log");
const strings_1 = require("../../../util/strings");
const summarizer_1 = require("../../../util/summarizer");
const assert_1 = require("../../../util/assert");
const shell_1 = require("../../../r-bridge/shell");
const retriever_1 = require("../../../r-bridge/retriever");
const visitor_1 = require("../../../r-bridge/lang-4.x/ast/model/processing/visitor");
const type_1 = require("../../../r-bridge/lang-4.x/ast/model/type");
const arrays_1 = require("../../../util/arrays");
const tempfile = (() => {
let _tempfile = undefined;
return () => {
if (_tempfile === undefined) {
_tempfile = tmp.fileSync({ postfix: '.R', keep: false });
process.on('beforeExit', () => _tempfile?.removeCallback());
}
return _tempfile;
};
})();
function safeDivPercentage(a, b) {
if (isNaN(a) || isNaN(b)) {
return undefined;
}
else if (b === 0) {
return a === 0 ? 0 : undefined;
}
else {
const result = 1 - (a / b);
if (isNaN(result)) {
log_1.log.error(`NaN for ${a} and ${b}\n`);
return undefined;
}
else {
return result;
}
}
}
function calculateReductionForSlice(input, dataflow, perSlice, ignoreFluff) {
const perSliceLines = ignoreFluff ? perSlice.nonEmptyLines : perSlice.lines;
const inputLines = ignoreFluff ? input.numberOfNonEmptyLines : input.numberOfLines;
return {
numberOfLines: safeDivPercentage(perSliceLines, inputLines),
numberOfLinesNoAutoSelection: safeDivPercentage(perSliceLines - perSlice.linesWithAutoSelected, inputLines),
numberOfCharacters: ignoreFluff ?
safeDivPercentage(perSlice.charactersNoComments, input.numberOfCharactersNoComments) :
safeDivPercentage(perSlice.characters, input.numberOfCharacters),
numberOfNonWhitespaceCharacters: ignoreFluff ?
safeDivPercentage(perSlice.nonWhitespaceCharactersNoComments, input.numberOfNonWhitespaceCharactersNoComments) :
safeDivPercentage(perSlice.nonWhitespaceCharacters, input.numberOfNonWhitespaceCharacters),
numberOfRTokens: ignoreFluff ?
safeDivPercentage(perSlice.tokensNoComments, input.numberOfRTokensNoComments) :
safeDivPercentage(perSlice.tokens, input.numberOfRTokens),
numberOfNormalizedTokens: ignoreFluff ?
safeDivPercentage(perSlice.normalizedTokensNoComments, input.numberOfNormalizedTokensNoComments) :
safeDivPercentage(perSlice.normalizedTokens, input.numberOfNormalizedTokens),
numberOfDataflowNodes: safeDivPercentage(perSlice.dataflowNodes, dataflow.numberOfNodes)
};
}
/**
* Summarizes the given stats by calculating the min, max, median, mean, and the standard deviation for each measurement.
* @see Slicer
*/
async function summarizeSlicerStats(stats, report = () => {
}) {
const collect = new defaultmap_1.DefaultMap(() => []);
const sizeOfSliceCriteria = [];
const reParseShellSession = new shell_1.RShell();
const sliceTimes = [];
const reconstructTimes = [];
const totalTimes = [];
const reductions = [];
const reductionsNoFluff = [];
let failedOutputs = 0;
const sliceSize = {
lines: [],
nonEmptyLines: [],
linesWithAutoSelected: [],
characters: [],
charactersNoComments: [],
nonWhitespaceCharacters: [],
nonWhitespaceCharactersNoComments: [],
tokens: [],
tokensNoComments: [],
normalizedTokens: [],
normalizedTokensNoComments: [],
dataflowNodes: []
};
let timesHitThreshold = 0;
for (const [criteria, perSliceStat] of stats.perSliceMeasurements) {
report(criteria, perSliceStat);
for (const measure of perSliceStat.measurements) {
collect.get(measure[0]).push(Number(measure[1]));
}
sizeOfSliceCriteria.push(perSliceStat.slicingCriteria.length);
timesHitThreshold += perSliceStat.timesHitThreshold > 0 ? 1 : 0;
const { code: output, linesWithAutoSelected } = perSliceStat.reconstructedCode;
sliceSize.linesWithAutoSelected.push(linesWithAutoSelected);
const split = output.split('\n');
const lines = split.length;
const nonEmptyLines = split.filter(l => l.trim().length > 0).length;
sliceSize.lines.push(lines);
sliceSize.nonEmptyLines.push(nonEmptyLines);
sliceSize.characters.push(output.length);
const nonWhitespace = (0, strings_1.withoutWhitespace)(output).length;
sliceSize.nonWhitespaceCharacters.push(nonWhitespace);
// reparse the output to get the number of tokens
try {
// there seem to be encoding issues, therefore, we dump to a temp file
fs_1.default.writeFileSync(tempfile().name, output);
const reParsed = await (0, retriever_1.retrieveNormalizedAstFromRCode)({ request: 'file', content: tempfile().name }, reParseShellSession);
let numberOfNormalizedTokens = 0;
let numberOfNormalizedTokensNoComments = 0;
let commentChars = 0;
let commentCharsNoWhitespace = 0;
(0, visitor_1.visitAst)(reParsed.ast, t => {
numberOfNormalizedTokens++;
const comments = t.info.additionalTokens?.filter(t => t.type === type_1.RType.Comment);
if (comments && comments.length > 0) {
const content = comments.map(c => c.lexeme ?? '').join('');
commentChars += content.length;
commentCharsNoWhitespace += (0, strings_1.withoutWhitespace)(content).length;
}
else {
numberOfNormalizedTokensNoComments++;
}
return false;
});
sliceSize.normalizedTokens.push(numberOfNormalizedTokens);
sliceSize.normalizedTokensNoComments.push(numberOfNormalizedTokensNoComments);
sliceSize.charactersNoComments.push(output.length - commentChars);
sliceSize.nonWhitespaceCharactersNoComments.push(nonWhitespace - commentCharsNoWhitespace);
const numberOfRTokens = await (0, retriever_1.retrieveNumberOfRTokensOfLastParse)(reParseShellSession);
sliceSize.tokens.push(numberOfRTokens);
const numberOfRTokensNoComments = await (0, retriever_1.retrieveNumberOfRTokensOfLastParse)(reParseShellSession, true);
sliceSize.tokensNoComments.push(numberOfRTokensNoComments);
const perSlice = {
lines: lines,
nonEmptyLines: nonEmptyLines,
characters: output.length,
charactersNoComments: output.length - commentChars,
nonWhitespaceCharacters: nonWhitespace,
nonWhitespaceCharactersNoComments: nonWhitespace - commentCharsNoWhitespace,
linesWithAutoSelected: linesWithAutoSelected,
tokens: numberOfRTokens,
tokensNoComments: numberOfRTokensNoComments,
normalizedTokens: numberOfNormalizedTokens,
normalizedTokensNoComments: numberOfNormalizedTokensNoComments,
dataflowNodes: perSliceStat.numberOfDataflowNodesSliced
};
reductions.push(calculateReductionForSlice(stats.input, stats.dataflow, perSlice, false));
reductionsNoFluff.push(calculateReductionForSlice(stats.input, stats.dataflow, perSlice, true));
const sliceTime = Number(perSliceStat.measurements.get('static slicing'));
const reconstructTime = Number(perSliceStat.measurements.get('reconstruct code'));
sliceTimes.push({
raw: sliceTime / numberOfRTokens,
normalized: sliceTime / numberOfNormalizedTokens
});
reconstructTimes.push({
raw: reconstructTime / numberOfRTokens,
normalized: reconstructTime / numberOfNormalizedTokens
});
totalTimes.push({
raw: (sliceTime + reconstructTime) / numberOfRTokens,
normalized: (sliceTime + reconstructTime) / numberOfNormalizedTokens
});
}
catch {
console.error(` ! Failed to re-parse the output of the slicer for ${JSON.stringify(criteria)}`); //, e
console.error(` Code: ${output}\n`);
failedOutputs++;
}
sliceSize.dataflowNodes.push(perSliceStat.numberOfDataflowNodesSliced);
}
// summarize all measurements:
const summarized = new Map();
for (const [criterion, measurements] of collect.entries()) {
summarized.set(criterion, (0, summarizer_1.summarizeMeasurement)(measurements));
}
reParseShellSession.close();
return {
...stats,
perSliceMeasurements: {
numberOfSlices: stats.perSliceMeasurements.size,
sliceCriteriaSizes: (0, summarizer_1.summarizeMeasurement)(sizeOfSliceCriteria),
measurements: summarized,
failedToRepParse: failedOutputs,
timesHitThreshold,
reduction: summarizeReductions(reductions),
reductionNoFluff: summarizeReductions(reductionsNoFluff),
sliceTimePerToken: summarizeTimePerToken(sliceTimes),
reconstructTimePerToken: summarizeTimePerToken(reconstructTimes),
totalPerSliceTimePerToken: summarizeTimePerToken(totalTimes),
sliceSize: {
lines: (0, summarizer_1.summarizeMeasurement)(sliceSize.lines),
nonEmptyLines: (0, summarizer_1.summarizeMeasurement)(sliceSize.nonEmptyLines),
characters: (0, summarizer_1.summarizeMeasurement)(sliceSize.characters),
charactersNoComments: (0, summarizer_1.summarizeMeasurement)(sliceSize.charactersNoComments),
nonWhitespaceCharacters: (0, summarizer_1.summarizeMeasurement)(sliceSize.nonWhitespaceCharacters),
nonWhitespaceCharactersNoComments: (0, summarizer_1.summarizeMeasurement)(sliceSize.nonWhitespaceCharactersNoComments),
linesWithAutoSelected: (0, summarizer_1.summarizeMeasurement)(sliceSize.linesWithAutoSelected),
tokens: (0, summarizer_1.summarizeMeasurement)(sliceSize.tokens),
tokensNoComments: (0, summarizer_1.summarizeMeasurement)(sliceSize.tokensNoComments),
normalizedTokens: (0, summarizer_1.summarizeMeasurement)(sliceSize.normalizedTokens),
normalizedTokensNoComments: (0, summarizer_1.summarizeMeasurement)(sliceSize.normalizedTokensNoComments),
dataflowNodes: (0, summarizer_1.summarizeMeasurement)(sliceSize.dataflowNodes)
}
}
};
}
function summarizeSummarizedMeasurement(data) {
data = data.filter(assert_1.isNotUndefined);
const min = Math.min(...data.map(d => d.min).filter(assert_1.isNotUndefined));
const max = Math.max(...data.map(d => d.max).filter(assert_1.isNotUndefined));
// calculate median of medians (don't just average the median!)
const medians = data.map(d => d.median).filter(assert_1.isNotUndefined).sort((a, b) => a - b);
const median = medians[Math.floor(medians.length / 2)];
const mean = (0, arrays_1.arraySum)(data.map(d => d.mean).filter(assert_1.isNotUndefined)) / data.length;
// Method 1 of https://www.statology.org/averaging-standard-deviations/
const std = Math.sqrt((0, arrays_1.arraySum)(data.map(d => d.std ** 2).filter(assert_1.isNotUndefined)) / data.length);
const total = (0, arrays_1.arraySum)(data.map(d => d.total).filter(assert_1.isNotUndefined));
return { min, max, median, mean, std, total };
}
function summarizeSummarizedReductions(reductions) {
return {
numberOfDataflowNodes: summarizeSummarizedMeasurement(reductions.map(r => r.numberOfDataflowNodes)),
numberOfLines: summarizeSummarizedMeasurement(reductions.map(r => r.numberOfLines)),
numberOfCharacters: summarizeSummarizedMeasurement(reductions.map(r => r.numberOfCharacters)),
numberOfNonWhitespaceCharacters: summarizeSummarizedMeasurement(reductions.map(r => r.numberOfNonWhitespaceCharacters)),
numberOfLinesNoAutoSelection: summarizeSummarizedMeasurement(reductions.map(r => r.numberOfLinesNoAutoSelection)),
numberOfNormalizedTokens: summarizeSummarizedMeasurement(reductions.map(r => r.numberOfNormalizedTokens)),
numberOfRTokens: summarizeSummarizedMeasurement(reductions.map(r => r.numberOfRTokens))
};
}
function summarizeReductions(reductions) {
return {
numberOfLines: (0, summarizer_1.summarizeMeasurement)(reductions.map(r => r.numberOfLines).filter(assert_1.isNotUndefined)),
numberOfLinesNoAutoSelection: (0, summarizer_1.summarizeMeasurement)(reductions.map(r => r.numberOfLinesNoAutoSelection).filter(assert_1.isNotUndefined)),
numberOfCharacters: (0, summarizer_1.summarizeMeasurement)(reductions.map(r => r.numberOfCharacters).filter(assert_1.isNotUndefined)),
numberOfNonWhitespaceCharacters: (0, summarizer_1.summarizeMeasurement)(reductions.map(r => r.numberOfNonWhitespaceCharacters).filter(assert_1.isNotUndefined)),
numberOfRTokens: (0, summarizer_1.summarizeMeasurement)(reductions.map(r => r.numberOfRTokens).filter(assert_1.isNotUndefined)),
numberOfNormalizedTokens: (0, summarizer_1.summarizeMeasurement)(reductions.map(r => r.numberOfNormalizedTokens).filter(assert_1.isNotUndefined)),
numberOfDataflowNodes: (0, summarizer_1.summarizeMeasurement)(reductions.map(r => r.numberOfDataflowNodes).filter(assert_1.isNotUndefined))
};
}
function summarizeSummarizedTimePerToken(times) {
return {
raw: summarizeSummarizedMeasurement(times.map(t => t.raw)),
normalized: summarizeSummarizedMeasurement(times.map(t => t.normalized)),
};
}
function summarizeTimePerToken(times) {
return {
raw: (0, summarizer_1.summarizeMeasurement)(times.map(t => t.raw)),
normalized: (0, summarizer_1.summarizeMeasurement)(times.map(t => t.normalized)),
};
}
//# sourceMappingURL=process.js.map