UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

176 lines 9.33 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.postProcess = postProcess; const common_syntax_probability_1 = require("../../common-syntax-probability"); const path_1 = __importDefault(require("path")); const fs_1 = __importDefault(require("fs")); const used_functions_1 = require("./used-functions"); const summarizer_1 = require("../../../../util/summarizer"); const files_1 = require("../../../../util/files"); const time_1 = require("../../../../util/text/time"); const numbers_1 = require("../../../../util/numbers"); function retrieveFunctionCallInformation(featureRoot, info, config, outputPath) { // each number[][] contains a 'number[]' per file /** * maps fn-name (including namespace) to number of arguments and their location (the number of elements in the array give the number of total call) * we use tuples to reduce the memory! * A function that is defined within the file is _always_ decorated with the filename (as second array element)! */ const functionsPerFile = new Map(); const importantFunctions = new Map(['parse', 'eval', 'deparse', 'quote', 'body', 'formals', 'body', 'environment', 'new.env', 'assign', 'get', 'setGeneric', 'R6Class'].map(name => [name, fs_1.default.createWriteStream(path_1.default.join(outputPath, `${name}.csv`))])); for (const [, value] of importantFunctions) { value.write('filepath,location,namespace,inspected by,classification,notes\n'); } // we collect only `all-calls` (0, files_1.readLineByLineSync)(path_1.default.join(featureRoot, `${used_functions_1.AllCallsFileBase}.txt`), (line, lineNumber) => processNextLine(functionsPerFile, lineNumber, info, JSON.parse(String(line)), config, importantFunctions)); for (const [, value] of importantFunctions) { value.close(); } importantFunctions.clear(); console.log(` [${(0, time_1.date2string)(new Date())}] Used functions process completed, start to write out function info`); const fnOutStream = fs_1.default.createWriteStream(path_1.default.join(outputPath, 'function-calls.csv')); const prefixes = ['total', 'args', 'line-frac']; const others = prefixes.flatMap(summarizer_1.summarizedMeasurement2CsvHeader).join(','); fnOutStream.write(`function,unique-projects,unique-files,${others}\n`); for (const [key, [uniqueProjects, uniqueFiles, total, args, lineFrac]] of functionsPerFile.entries()) { const totalSum = (0, summarizer_1.summarizeMeasurement)(total.flat(), info.size); const argsSum = (0, summarizer_1.summarizeMeasurement)(args.flat(), info.size); const lineFracSum = (0, summarizer_1.summarizeMeasurement)(lineFrac.flat()); // we write in csv style :), we escape the key in case it contains commas (with filenames) etc. fnOutStream.write(`${JSON.stringify(key ?? 'unknown')},${uniqueProjects.size},${uniqueFiles.size},${(0, summarizer_1.summarizedMeasurement2Csv)(totalSum)},${(0, summarizer_1.summarizedMeasurement2Csv)(argsSum)},${(0, summarizer_1.summarizedMeasurement2Csv)(lineFracSum)}\n`); } fnOutStream.close(); } function writeFunctionCallsMetaInformationToCsv(outputPath, data) { const out = fs_1.default.createWriteStream(path_1.default.join(outputPath, 'function-calls-meta.csv')); out.write(`kind,unique-projects,unique-files,${(0, summarizer_1.summarizedMeasurement2CsvHeader)()}\n`); out.write(`average-call,${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(data.averageCall.flat()))}\n`); out.write(`nested-calls,${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(data.nestedCalls.flat()))}\n`); out.write(`deepest-nesting,${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(data.deepestNesting.flat()))}\n`); out.write(`empty-args,${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(data.emptyArgs.flat()))}\n`); out.write(`unnamed-calls,${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(data.unnamedCalls.flat()))}\n`); out.close(); } function retrieveFunctionCallMetaInformation(info, outputPath) { const data = { averageCall: [], nestedCalls: [], deepestNesting: [], emptyArgs: [], unnamedCalls: [], args: [] }; for (const meta of info.values()) { const us = meta.usedFunctions; data.averageCall.push([us.allFunctionCalls]); data.nestedCalls.push([us.nestedFunctionCalls]); data.deepestNesting.push([us.deepestNesting]); data.emptyArgs.push([(0, numbers_1.bigint2number)(us.args[0])]); data.unnamedCalls.push([us.unnamedCalls]); for (const [i, val] of Object.entries(us.args)) { if (Number(i) !== 0) { let get = data.args[Number(i)]; if (!get) { get = (0, common_syntax_probability_1.emptyCommonSyntaxTypeCounts)(() => []); data.args[Number(i)] = get; } (0, common_syntax_probability_1.appendCommonSyntaxTypeCounter)(get, val); } } } console.log(` [${(0, time_1.date2string)(new Date())}] Used functions metadata reading completed, summarizing and writing to file`); writeFunctionCallsMetaInformationToCsv(outputPath, data); for (const [index, arg] of data.args.entries()) { if (!arg) { // we treat the first/0-argument entry separate for legacy reasons continue; } const out = fs_1.default.createWriteStream(path_1.default.join(outputPath, `function-calls-arg-${index}.csv`)); out.write(`kind,name,${(0, summarizer_1.summarizedMeasurement2CsvHeader)()}\n`); for (const [name, vals] of Object.entries(arg)) { if (Array.isArray(vals)) { out.write(`${JSON.stringify(name)},"",${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(vals.flat()))}\n`); } else { for (const [keyName, keyValue] of Object.entries(vals)) { out.write(`${JSON.stringify(name)},${JSON.stringify(keyName)},${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(keyValue.flat()))}\n`); } } } out.close(); } } /** * Note: the summary does not contain a 0 for each function that is _not_ called by a file. Hence, the minimum can not be 0 (division for mean etc. will still be performed on total file count) */ function postProcess(featureRoot, info, outputPath, config) { retrieveFunctionCallInformation(featureRoot, info, config, outputPath); console.log(` [${(0, time_1.date2string)(new Date())}] Used functions reading completed, summarizing info...`); retrieveFunctionCallMetaInformation(info, outputPath); } function processNextLine(data, lineNumber, info, line, config, importants) { if (lineNumber % 2_500 === 0) { console.log(` [${(0, time_1.date2string)(new Date())}] Used functions processed ${lineNumber} lines`); } const [hits, context] = line; // group hits by fullname const groupedByFunctionName = new Map(); for (const [name, loc, args, ns, known] of hits) { const importantWrite = name && importants.get(name); if (importantWrite) { importantWrite.write(`${JSON.stringify(context)},${loc?.[0] ?? '?'}:${loc?.[1] ?? '?'},${ns ?? '""'},,,\n`); } const fullname = ns && ns !== '' ? `${ns}::${name ?? ''}` : name; const key = (fullname ?? '') + (known === 1 ? '-' + (context ?? '') : ''); const stats = info.get(context ?? '')?.stats.lines[0].length; let get = groupedByFunctionName.get(key); if (!get) { get = [new Set(), new Set(), [], [], []]; groupedByFunctionName.set(key, get); } // we retrieve the first component fo the path const projectName = context?.split(path_1.default.sep)[config.projectSkip]; get[0].add(projectName ?? ''); get[1].add(context ?? ''); get[2].push(1); get[3].push(args); if (loc && stats) { // we reduce by 1 to get flat 0% if it is the first line get[4].push(stats === 1 ? 1 : (loc[0] - 1) / (stats - 1)); } } for (const [key, info] of groupedByFunctionName.entries()) { let get = data.get(key); if (!get) { get = [new Set(), new Set(), [], [], []]; // an amazing empty structure :D data.set(key, get); } // for total, we only need the number of elements as it will always be one :D for (const context of info[0]) { get[0].add(context); } for (const context of info[1]) { get[1].add(context); } get[2].push([info[2].length]); get[3].push(info[3]); get[4].push(info[4]); } } //# sourceMappingURL=post-process.js.map