@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
176 lines • 9.33 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.postProcess = postProcess;
const common_syntax_probability_1 = require("../../common-syntax-probability");
const path_1 = __importDefault(require("path"));
const fs_1 = __importDefault(require("fs"));
const used_functions_1 = require("./used-functions");
const summarizer_1 = require("../../../../util/summarizer");
const files_1 = require("../../../../util/files");
const time_1 = require("../../../../util/text/time");
const numbers_1 = require("../../../../util/numbers");
function retrieveFunctionCallInformation(featureRoot, info, config, outputPath) {
// each number[][] contains a 'number[]' per file
/**
* maps fn-name (including namespace) to number of arguments and their location (the number of elements in the array give the number of total call)
* we use tuples to reduce the memory!
* A function that is defined within the file is _always_ decorated with the filename (as second array element)!
*/
const functionsPerFile = new Map();
const importantFunctions = new Map(['parse',
'eval',
'deparse',
'quote',
'body',
'formals',
'body',
'environment',
'new.env',
'assign',
'get',
'setGeneric',
'R6Class'].map(name => [name, fs_1.default.createWriteStream(path_1.default.join(outputPath, `${name}.csv`))]));
for (const [, value] of importantFunctions) {
value.write('filepath,location,namespace,inspected by,classification,notes\n');
}
// we collect only `all-calls`
(0, files_1.readLineByLineSync)(path_1.default.join(featureRoot, `${used_functions_1.AllCallsFileBase}.txt`), (line, lineNumber) => processNextLine(functionsPerFile, lineNumber, info, JSON.parse(String(line)), config, importantFunctions));
for (const [, value] of importantFunctions) {
value.close();
}
importantFunctions.clear();
console.log(` [${(0, time_1.date2string)(new Date())}] Used functions process completed, start to write out function info`);
const fnOutStream = fs_1.default.createWriteStream(path_1.default.join(outputPath, 'function-calls.csv'));
const prefixes = ['total', 'args', 'line-frac'];
const others = prefixes.flatMap(summarizer_1.summarizedMeasurement2CsvHeader).join(',');
fnOutStream.write(`function,unique-projects,unique-files,${others}\n`);
for (const [key, [uniqueProjects, uniqueFiles, total, args, lineFrac]] of functionsPerFile.entries()) {
const totalSum = (0, summarizer_1.summarizeMeasurement)(total.flat(), info.size);
const argsSum = (0, summarizer_1.summarizeMeasurement)(args.flat(), info.size);
const lineFracSum = (0, summarizer_1.summarizeMeasurement)(lineFrac.flat());
// we write in csv style :), we escape the key in case it contains commas (with filenames) etc.
fnOutStream.write(`${JSON.stringify(key ?? 'unknown')},${uniqueProjects.size},${uniqueFiles.size},${(0, summarizer_1.summarizedMeasurement2Csv)(totalSum)},${(0, summarizer_1.summarizedMeasurement2Csv)(argsSum)},${(0, summarizer_1.summarizedMeasurement2Csv)(lineFracSum)}\n`);
}
fnOutStream.close();
}
function writeFunctionCallsMetaInformationToCsv(outputPath, data) {
const out = fs_1.default.createWriteStream(path_1.default.join(outputPath, 'function-calls-meta.csv'));
out.write(`kind,unique-projects,unique-files,${(0, summarizer_1.summarizedMeasurement2CsvHeader)()}\n`);
out.write(`average-call,${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(data.averageCall.flat()))}\n`);
out.write(`nested-calls,${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(data.nestedCalls.flat()))}\n`);
out.write(`deepest-nesting,${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(data.deepestNesting.flat()))}\n`);
out.write(`empty-args,${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(data.emptyArgs.flat()))}\n`);
out.write(`unnamed-calls,${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(data.unnamedCalls.flat()))}\n`);
out.close();
}
function retrieveFunctionCallMetaInformation(info, outputPath) {
const data = {
averageCall: [],
nestedCalls: [],
deepestNesting: [],
emptyArgs: [],
unnamedCalls: [],
args: []
};
for (const meta of info.values()) {
const us = meta.usedFunctions;
data.averageCall.push([us.allFunctionCalls]);
data.nestedCalls.push([us.nestedFunctionCalls]);
data.deepestNesting.push([us.deepestNesting]);
data.emptyArgs.push([(0, numbers_1.bigint2number)(us.args[0])]);
data.unnamedCalls.push([us.unnamedCalls]);
for (const [i, val] of Object.entries(us.args)) {
if (Number(i) !== 0) {
let get = data.args[Number(i)];
if (!get) {
get = (0, common_syntax_probability_1.emptyCommonSyntaxTypeCounts)(() => []);
data.args[Number(i)] = get;
}
(0, common_syntax_probability_1.appendCommonSyntaxTypeCounter)(get, val);
}
}
}
console.log(` [${(0, time_1.date2string)(new Date())}] Used functions metadata reading completed, summarizing and writing to file`);
writeFunctionCallsMetaInformationToCsv(outputPath, data);
for (const [index, arg] of data.args.entries()) {
if (!arg) {
// we treat the first/0-argument entry separate for legacy reasons
continue;
}
const out = fs_1.default.createWriteStream(path_1.default.join(outputPath, `function-calls-arg-${index}.csv`));
out.write(`kind,name,${(0, summarizer_1.summarizedMeasurement2CsvHeader)()}\n`);
for (const [name, vals] of Object.entries(arg)) {
if (Array.isArray(vals)) {
out.write(`${JSON.stringify(name)},"",${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(vals.flat()))}\n`);
}
else {
for (const [keyName, keyValue] of Object.entries(vals)) {
out.write(`${JSON.stringify(name)},${JSON.stringify(keyName)},${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(keyValue.flat()))}\n`);
}
}
}
out.close();
}
}
/**
* Note: the summary does not contain a 0 for each function that is _not_ called by a file. Hence, the minimum can not be 0 (division for mean etc. will still be performed on total file count)
*/
function postProcess(featureRoot, info, outputPath, config) {
retrieveFunctionCallInformation(featureRoot, info, config, outputPath);
console.log(` [${(0, time_1.date2string)(new Date())}] Used functions reading completed, summarizing info...`);
retrieveFunctionCallMetaInformation(info, outputPath);
}
function processNextLine(data, lineNumber, info, line, config, importants) {
if (lineNumber % 2_500 === 0) {
console.log(` [${(0, time_1.date2string)(new Date())}] Used functions processed ${lineNumber} lines`);
}
const [hits, context] = line;
// group hits by fullname
const groupedByFunctionName = new Map();
for (const [name, loc, args, ns, known] of hits) {
const importantWrite = name && importants.get(name);
if (importantWrite) {
importantWrite.write(`${JSON.stringify(context)},${loc?.[0] ?? '?'}:${loc?.[1] ?? '?'},${ns ?? '""'},,,\n`);
}
const fullname = ns && ns !== '' ? `${ns}::${name ?? ''}` : name;
const key = (fullname ?? '') + (known === 1 ? '-' + (context ?? '') : '');
const stats = info.get(context ?? '')?.stats.lines[0].length;
let get = groupedByFunctionName.get(key);
if (!get) {
get = [new Set(), new Set(), [], [], []];
groupedByFunctionName.set(key, get);
}
// we retrieve the first component fo the path
const projectName = context?.split(path_1.default.sep)[config.projectSkip];
get[0].add(projectName ?? '');
get[1].add(context ?? '');
get[2].push(1);
get[3].push(args);
if (loc && stats) {
// we reduce by 1 to get flat 0% if it is the first line
get[4].push(stats === 1 ? 1 : (loc[0] - 1) / (stats - 1));
}
}
for (const [key, info] of groupedByFunctionName.entries()) {
let get = data.get(key);
if (!get) {
get = [new Set(), new Set(), [], [], []];
// an amazing empty structure :D
data.set(key, get);
}
// for total, we only need the number of elements as it will always be one :D
for (const context of info[0]) {
get[0].add(context);
}
for (const context of info[1]) {
get[1].add(context);
}
get[2].push([info[2].length]);
get[3].push(info[3]);
get[4].push(info[4]);
}
}
//# sourceMappingURL=post-process.js.map