@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
116 lines • 7.42 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.postProcessFeatures = postProcessFeatures;
exports.postProcessFeatureFolder = postProcessFeatureFolder;
const fs_1 = __importDefault(require("fs"));
const path_1 = __importDefault(require("path"));
const time_1 = require("../../../util/text/time");
const summarizer_1 = require("../../../util/summarizer");
const arrays_1 = require("../../../util/collections/arrays");
const files_1 = require("../../../util/files");
const assert_1 = require("../../../util/assert");
const feature_1 = require("../../features/feature");
function postProcessFeatures(config, filepath, outputPath, logger, metaFeatureInformation) {
for (const featureName of config.featuresToUse) {
const featureInfo = feature_1.ALL_FEATURES[featureName];
const targetPath = path_1.default.join(filepath, featureInfo.name);
const targetFeature = path_1.default.join(outputPath, featureInfo.name);
if (!featureInfo.postProcess) {
logger(` Skipping post processing of ${featureName} as no post processing behavior is defined`);
continue;
}
logger(` Post processing of ${featureName}...`);
if (!fs_1.default.existsSync(targetFeature)) {
fs_1.default.mkdirSync(targetFeature, { recursive: true });
}
if (global.gc) {
logger(` [${(0, time_1.date2string)(new Date())}] Running garbage collection (--expose-gc)`);
global.gc();
}
featureInfo.postProcess(targetPath, metaFeatureInformation, targetFeature, config);
}
}
function postProcessMeta(config, filepath, outputPath, logger, metaFeatureInformation) {
const fileStatisticsSummary = {
successfulParsed: [],
processingTimeMs: [],
failedRequests: [],
// min lengths of 1 etc. could come from different line endings
lines: [],
characters: [],
numberOfNormalizedNodes: []
};
if (!fs_1.default.existsSync(path_1.default.join(outputPath, 'meta'))) {
fs_1.default.mkdirSync(path_1.default.join(outputPath, 'meta'), { recursive: true });
}
const out = fs_1.default.createWriteStream(path_1.default.join(outputPath, 'meta', 'stats.csv'));
out.write(`file,successfulParsed,${(0, summarizer_1.summarizedMeasurement2CsvHeader)('processing')},failedRequests,${(0, summarizer_1.summarizedMeasurement2CsvHeader)('line-length')},${(0, summarizer_1.summarizedMeasurement2CsvHeader)('lines')},${(0, summarizer_1.summarizedMeasurement2CsvHeader)('characters')},numberOfNormalizedNodes\n`);
for (const [file, info] of metaFeatureInformation) {
// we could retrieve these by summing later as well :thinking: however, this makes it more explicit
const characters = (0, arrays_1.arraySum)(info.stats.lines[0]);
out.write(`${JSON.stringify(file)},${info.stats.successfulParsed},${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(info.stats.processingTimeMs))},`
+ `${info.stats.failedRequests.length},${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(info.stats.lines[0]))},${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)([info.stats.lines[0].length]))},${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)([characters]))},${info.stats.numberOfNormalizedNodes[0]}\n`);
fileStatisticsSummary.successfulParsed.push(info.stats.successfulParsed);
fileStatisticsSummary.processingTimeMs.push(...info.stats.processingTimeMs);
fileStatisticsSummary.failedRequests.push(info.stats.failedRequests.length);
fileStatisticsSummary.lines.push(info.stats.lines[0]);
fileStatisticsSummary.characters.push(characters);
fileStatisticsSummary.numberOfNormalizedNodes.push(info.stats.numberOfNormalizedNodes[0]);
}
out.write(`all,${(0, arrays_1.arraySum)(fileStatisticsSummary.successfulParsed)},${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(fileStatisticsSummary.processingTimeMs))},`
+ `${(0, arrays_1.arraySum)(fileStatisticsSummary.failedRequests)},${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(fileStatisticsSummary.lines.flat()))},${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(fileStatisticsSummary.lines.map(l => l.length)))},${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(fileStatisticsSummary.characters))},${(0, arrays_1.arraySum)(fileStatisticsSummary.numberOfNormalizedNodes)}\n`);
out.close();
}
/**
* Post process the collections in a given folder, retrieving the final summaries.
*
* @param logger - The logger to use for outputs
* @param filepath - Path to the root file of the data collection (contains all the archives)
* @param config - Configuration of the summarizer
* @param outputPath - The final outputPath to write the result to (may differ from the configured root folder)
*/
function postProcessFeatureFolder(logger, filepath, config, outputPath) {
if (!fs_1.default.existsSync(filepath)) {
logger(` Folder for ${filepath} does not exist, skipping post processing`);
return;
}
if (!fs_1.default.existsSync(outputPath)) {
fs_1.default.mkdirSync(outputPath, { recursive: true });
}
const metaFeatureInformation = extractMetaInformationFrom(logger, path_1.default.join(filepath, 'meta', 'features.txt'), path_1.default.join(filepath, 'meta', 'stats.txt'));
postProcessFeatures(config, filepath, outputPath, logger, metaFeatureInformation);
postProcessMeta(config, filepath, outputPath, logger, metaFeatureInformation);
}
function extractMetaInformationFrom(logger, metaFeaturesPath, metaStatsPath) {
const storage = new Map();
logger(` [${(0, time_1.date2string)(new Date())}] Collect feature statistics`);
(0, files_1.readLineByLineSync)(metaFeaturesPath, (line, lineNumber) => {
if (line.length === 0) {
return;
}
if (lineNumber % 2_500 === 0) {
logger(` [${(0, time_1.date2string)(new Date())}] ${lineNumber} meta feature lines processed`);
}
const meta = JSON.parse(line.toString());
storage.set(meta.file, meta.content);
});
logger(` [${(0, time_1.date2string)(new Date())}] Collect meta statistics`);
(0, files_1.readLineByLineSync)(metaStatsPath, (line, lineNumber) => {
if (line.length === 0) {
return;
}
if (lineNumber % 2_500 === 0) {
logger(` [${(0, time_1.date2string)(new Date())}] ${lineNumber} meta statistics lines processed`);
}
const meta = JSON.parse(line.toString());
const existing = storage.get(meta.file);
(0, assert_1.guard)(existing !== undefined, () => `Expected to find meta information for ${meta.file} in line ${lineNumber + 1} of ${metaFeaturesPath}`);
existing.stats = meta.content;
});
logger(` [${(0, time_1.date2string)(new Date())}] Done collecting meta information`);
return storage;
}
//# sourceMappingURL=process.js.map