UNPKG

@eagleoutice/flowr

Version:

Static Dataflow Analyzer and Program Slicer for the R Programming Language

141 lines 6.11 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.StatisticsSummarizer = exports.statisticsFileNameRegex = void 0; const tar_1 = require("tar"); const fs_1 = __importDefault(require("fs")); const path_1 = __importDefault(require("path")); const process_1 = require("./first-phase/process"); const process_2 = require("./second-phase/process"); const summarizer_1 = require("../../util/summarizer"); const strings_1 = require("../../util/strings"); const files_1 = require("../../util/files"); const time_1 = require("../../util/time"); const log_1 = require("../../util/log"); exports.statisticsFileNameRegex = /.*--.*\.tar\.gz$/; /** * The returned map contains the full path as key, mapping it to the complete contents. */ async function retrieveAllFilesInArchive(f) { const filenames = new Map(); const promises = []; (0, tar_1.list)({ file: f, onentry: entry => { if (entry.type === 'File') { promises.push(entry.concat().then(content => { filenames.set(entry.path, content.toString()); })); } }, sync: true }); await Promise.all(promises); return filenames; } function identifyCommonPrefix(files) { return (0, strings_1.longestCommonPrefix)([...files.keys()]); } /** returns the target path */ async function extractArchive(f) { const files = await retrieveAllFilesInArchive(f); const commonRoot = identifyCommonPrefix(files); // post process until we find the '<filename>.(r|R)' suffix. otherwise, if there are no features and only the meta folder, the meta folder will be removed, resulting in a write // to the toplevel! const fname = path_1.default.basename(f).replace(/\.tar\.gz$/, ''); const findIndex = commonRoot.indexOf(fname); const commonPart = findIndex < 0 ? commonRoot.length : findIndex + fname.length; // transform all map keys by removing the common root const transformed = new Map(); for (const [key, value] of files.entries()) { transformed.set(key.slice(commonPart), value); } return transformed; } // due to a redefinition after the initial statistic extraction, we extract the type from the remaining path :D // original: /^([^-]*)---?(.+)\.tar.gz/ const filePrefixRegex = /^[^-]*---?(?<fullname>([^/]+)\/(?<pathtest>.+))\.tar\.gz$/; const testRegex = /.*test[-_]?(s|that|)\//i; /** if it starts with example-, this will return `'example'`, etc. if it starts with '--' this will return `undefined` */ function identifyExtractionType(path) { const match = filePrefixRegex.exec(path.replace(///g, '/')); if (match === null || match?.groups === undefined) { return undefined; } // recover const originalFile = match.groups.fullname; let folder; if (testRegex.test(match.groups.pathtest)) { folder = 'test'; } else if (match.groups.pathtest.includes('example')) { folder = 'example'; } else { folder = 'default'; } return { folder, originalFile }; } class StatisticsSummarizer extends summarizer_1.Summarizer { constructor(config) { super(config); } removeIfExists(path) { if (path && fs_1.default.existsSync(path)) { this.log(`Removing existing ${path}`); try { fs_1.default.rmSync(path, { recursive: true, force: true }); } catch { log_1.log.error('failure in cleanup'); } } } /** * The preparation phase essentially merges all files into one by just attaching lines together! */ async preparationPhase(useTypeClassification) { this.removeIfExists(this.config.intermediateOutputPath); fs_1.default.mkdirSync(this.config.intermediateOutputPath, { recursive: true }); let count = 0; const migrator = new process_1.FileMigrator(); for await (const f of (0, files_1.getAllFiles)(this.config.inputPath, /\.tar.gz$/)) { this.log(`[${count++}, ${(0, time_1.date2string)()}] processing file ${f} (to ${this.config.intermediateOutputPath})`); let target; try { target = await extractArchive(f); this.log(' Collected!'); } catch (e) { this.log(` Failed to extract ${f}, skipping...`); this.log(' Error: ' + JSON.stringify(e)); continue; } this.log(' Migrating files...'); const extracted = identifyExtractionType(path_1.default.basename(f)); await migrator.migrate(target, path_1.default.join(this.config.intermediateOutputPath, useTypeClassification ? extracted?.folder ?? 'default' : 'uncategorized'), extracted?.originalFile); this.log(' Done! (Cleanup...)'); } migrator.finish(); this.log(`Found ${count} files to summarize`); return Promise.resolve(); } async summarizePhase() { // detect all subfolders in the current folder (default, test...) for each: concat. this.removeIfExists(this.config.outputPath); fs_1.default.mkdirSync(this.config.outputPath, { recursive: true }); const folders = fs_1.default.readdirSync(this.config.intermediateOutputPath, { recursive: false }); for (const folder of folders) { const folderStr = String(folder); const output = path_1.default.join(this.config.outputPath, folderStr); const input = path_1.default.join(this.config.intermediateOutputPath, folderStr); this.log(`Summarizing for ${input} (target: ${output})`); (0, process_2.postProcessFeatureFolder)(this.log, input, this.config, output); } return Promise.resolve(undefined); } } exports.StatisticsSummarizer = StatisticsSummarizer; //# sourceMappingURL=summarizer.js.map