UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

116 lines 7.42 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.postProcessFeatures = postProcessFeatures; exports.postProcessFeatureFolder = postProcessFeatureFolder; const fs_1 = __importDefault(require("fs")); const path_1 = __importDefault(require("path")); const time_1 = require("../../../util/text/time"); const summarizer_1 = require("../../../util/summarizer"); const arrays_1 = require("../../../util/collections/arrays"); const files_1 = require("../../../util/files"); const assert_1 = require("../../../util/assert"); const feature_1 = require("../../features/feature"); function postProcessFeatures(config, filepath, outputPath, logger, metaFeatureInformation) { for (const featureName of config.featuresToUse) { const featureInfo = feature_1.ALL_FEATURES[featureName]; const targetPath = path_1.default.join(filepath, featureInfo.name); const targetFeature = path_1.default.join(outputPath, featureInfo.name); if (!featureInfo.postProcess) { logger(` Skipping post processing of ${featureName} as no post processing behavior is defined`); continue; } logger(` Post processing of ${featureName}...`); if (!fs_1.default.existsSync(targetFeature)) { fs_1.default.mkdirSync(targetFeature, { recursive: true }); } if (global.gc) { logger(` [${(0, time_1.date2string)(new Date())}] Running garbage collection (--expose-gc)`); global.gc(); } featureInfo.postProcess(targetPath, metaFeatureInformation, targetFeature, config); } } function postProcessMeta(config, filepath, outputPath, logger, metaFeatureInformation) { const fileStatisticsSummary = { successfulParsed: [], processingTimeMs: [], failedRequests: [], // min lengths of 1 etc. could come from different line endings lines: [], characters: [], numberOfNormalizedNodes: [] }; if (!fs_1.default.existsSync(path_1.default.join(outputPath, 'meta'))) { fs_1.default.mkdirSync(path_1.default.join(outputPath, 'meta'), { recursive: true }); } const out = fs_1.default.createWriteStream(path_1.default.join(outputPath, 'meta', 'stats.csv')); out.write(`file,successfulParsed,${(0, summarizer_1.summarizedMeasurement2CsvHeader)('processing')},failedRequests,${(0, summarizer_1.summarizedMeasurement2CsvHeader)('line-length')},${(0, summarizer_1.summarizedMeasurement2CsvHeader)('lines')},${(0, summarizer_1.summarizedMeasurement2CsvHeader)('characters')},numberOfNormalizedNodes\n`); for (const [file, info] of metaFeatureInformation) { // we could retrieve these by summing later as well :thinking: however, this makes it more explicit const characters = (0, arrays_1.arraySum)(info.stats.lines[0]); out.write(`${JSON.stringify(file)},${info.stats.successfulParsed},${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(info.stats.processingTimeMs))},` + `${info.stats.failedRequests.length},${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(info.stats.lines[0]))},${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)([info.stats.lines[0].length]))},${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)([characters]))},${info.stats.numberOfNormalizedNodes[0]}\n`); fileStatisticsSummary.successfulParsed.push(info.stats.successfulParsed); fileStatisticsSummary.processingTimeMs.push(...info.stats.processingTimeMs); fileStatisticsSummary.failedRequests.push(info.stats.failedRequests.length); fileStatisticsSummary.lines.push(info.stats.lines[0]); fileStatisticsSummary.characters.push(characters); fileStatisticsSummary.numberOfNormalizedNodes.push(info.stats.numberOfNormalizedNodes[0]); } out.write(`all,${(0, arrays_1.arraySum)(fileStatisticsSummary.successfulParsed)},${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(fileStatisticsSummary.processingTimeMs))},` + `${(0, arrays_1.arraySum)(fileStatisticsSummary.failedRequests)},${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(fileStatisticsSummary.lines.flat()))},${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(fileStatisticsSummary.lines.map(l => l.length)))},${(0, summarizer_1.summarizedMeasurement2Csv)((0, summarizer_1.summarizeMeasurement)(fileStatisticsSummary.characters))},${(0, arrays_1.arraySum)(fileStatisticsSummary.numberOfNormalizedNodes)}\n`); out.close(); } /** * Post process the collections in a given folder, retrieving the final summaries. * * @param logger - The logger to use for outputs * @param filepath - Path to the root file of the data collection (contains all the archives) * @param config - Configuration of the summarizer * @param outputPath - The final outputPath to write the result to (may differ from the configured root folder) */ function postProcessFeatureFolder(logger, filepath, config, outputPath) { if (!fs_1.default.existsSync(filepath)) { logger(` Folder for ${filepath} does not exist, skipping post processing`); return; } if (!fs_1.default.existsSync(outputPath)) { fs_1.default.mkdirSync(outputPath, { recursive: true }); } const metaFeatureInformation = extractMetaInformationFrom(logger, path_1.default.join(filepath, 'meta', 'features.txt'), path_1.default.join(filepath, 'meta', 'stats.txt')); postProcessFeatures(config, filepath, outputPath, logger, metaFeatureInformation); postProcessMeta(config, filepath, outputPath, logger, metaFeatureInformation); } function extractMetaInformationFrom(logger, metaFeaturesPath, metaStatsPath) { const storage = new Map(); logger(` [${(0, time_1.date2string)(new Date())}] Collect feature statistics`); (0, files_1.readLineByLineSync)(metaFeaturesPath, (line, lineNumber) => { if (line.length === 0) { return; } if (lineNumber % 2_500 === 0) { logger(` [${(0, time_1.date2string)(new Date())}] ${lineNumber} meta feature lines processed`); } const meta = JSON.parse(line.toString()); storage.set(meta.file, meta.content); }); logger(` [${(0, time_1.date2string)(new Date())}] Collect meta statistics`); (0, files_1.readLineByLineSync)(metaStatsPath, (line, lineNumber) => { if (line.length === 0) { return; } if (lineNumber % 2_500 === 0) { logger(` [${(0, time_1.date2string)(new Date())}] ${lineNumber} meta statistics lines processed`); } const meta = JSON.parse(line.toString()); const existing = storage.get(meta.file); (0, assert_1.guard)(existing !== undefined, () => `Expected to find meta information for ${meta.file} in line ${lineNumber + 1} of ${metaFeaturesPath}`); existing.stats = meta.content; }); logger(` [${(0, time_1.date2string)(new Date())}] Done collecting meta information`); return storage; } //# sourceMappingURL=process.js.map