@eagleoutice/flowr
Version:
Static Dataflow Analyzer and Program Slicer for the R Programming Language
141 lines • 6.11 kB
JavaScript
;
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.StatisticsSummarizer = exports.statisticsFileNameRegex = void 0;
const tar_1 = require("tar");
const fs_1 = __importDefault(require("fs"));
const path_1 = __importDefault(require("path"));
const process_1 = require("./first-phase/process");
const process_2 = require("./second-phase/process");
const summarizer_1 = require("../../util/summarizer");
const strings_1 = require("../../util/strings");
const files_1 = require("../../util/files");
const time_1 = require("../../util/time");
const log_1 = require("../../util/log");
exports.statisticsFileNameRegex = /.*--.*\.tar\.gz$/;
/**
* The returned map contains the full path as key, mapping it to the complete contents.
*/
async function retrieveAllFilesInArchive(f) {
const filenames = new Map();
const promises = [];
(0, tar_1.list)({
file: f,
onentry: entry => {
if (entry.type === 'File') {
promises.push(entry.concat().then(content => {
filenames.set(entry.path, content.toString());
}));
}
},
sync: true
});
await Promise.all(promises);
return filenames;
}
function identifyCommonPrefix(files) {
return (0, strings_1.longestCommonPrefix)([...files.keys()]);
}
/** returns the target path */
async function extractArchive(f) {
const files = await retrieveAllFilesInArchive(f);
const commonRoot = identifyCommonPrefix(files);
// post process until we find the '<filename>.(r|R)' suffix. otherwise, if there are no features and only the meta folder, the meta folder will be removed, resulting in a write
// to the toplevel!
const fname = path_1.default.basename(f).replace(/\.tar\.gz$/, '');
const findIndex = commonRoot.indexOf(fname);
const commonPart = findIndex < 0 ? commonRoot.length : findIndex + fname.length;
// transform all map keys by removing the common root
const transformed = new Map();
for (const [key, value] of files.entries()) {
transformed.set(key.slice(commonPart), value);
}
return transformed;
}
// due to a redefinition after the initial statistic extraction, we extract the type from the remaining path :D
// original: /^([^-]*)---?(.+)\.tar.gz/
const filePrefixRegex = /^[^-]*---?(?<fullname>([^/]+)\/(?<pathtest>.+))\.tar\.gz$/;
const testRegex = /.*test[-_]?(s|that|)\//i;
/** if it starts with example-, this will return `'example'`, etc. if it starts with '--' this will return `undefined` */
function identifyExtractionType(path) {
const match = filePrefixRegex.exec(path.replace(///g, '/'));
if (match === null || match?.groups === undefined) {
return undefined;
}
// recover
const originalFile = match.groups.fullname;
let folder;
if (testRegex.test(match.groups.pathtest)) {
folder = 'test';
}
else if (match.groups.pathtest.includes('example')) {
folder = 'example';
}
else {
folder = 'default';
}
return { folder, originalFile };
}
class StatisticsSummarizer extends summarizer_1.Summarizer {
constructor(config) {
super(config);
}
removeIfExists(path) {
if (path && fs_1.default.existsSync(path)) {
this.log(`Removing existing ${path}`);
try {
fs_1.default.rmSync(path, { recursive: true, force: true });
}
catch {
log_1.log.error('failure in cleanup');
}
}
}
/**
* The preparation phase essentially merges all files into one by just attaching lines together!
*/
async preparationPhase(useTypeClassification) {
this.removeIfExists(this.config.intermediateOutputPath);
fs_1.default.mkdirSync(this.config.intermediateOutputPath, { recursive: true });
let count = 0;
const migrator = new process_1.FileMigrator();
for await (const f of (0, files_1.getAllFiles)(this.config.inputPath, /\.tar.gz$/)) {
this.log(`[${count++}, ${(0, time_1.date2string)()}] processing file ${f} (to ${this.config.intermediateOutputPath})`);
let target;
try {
target = await extractArchive(f);
this.log(' Collected!');
}
catch (e) {
this.log(` Failed to extract ${f}, skipping...`);
this.log(' Error: ' + JSON.stringify(e));
continue;
}
this.log(' Migrating files...');
const extracted = identifyExtractionType(path_1.default.basename(f));
await migrator.migrate(target, path_1.default.join(this.config.intermediateOutputPath, useTypeClassification ? extracted?.folder ?? 'default' : 'uncategorized'), extracted?.originalFile);
this.log(' Done! (Cleanup...)');
}
migrator.finish();
this.log(`Found ${count} files to summarize`);
return Promise.resolve();
}
async summarizePhase() {
// detect all subfolders in the current folder (default, test...) for each: concat.
this.removeIfExists(this.config.outputPath);
fs_1.default.mkdirSync(this.config.outputPath, { recursive: true });
const folders = fs_1.default.readdirSync(this.config.intermediateOutputPath, { recursive: false });
for (const folder of folders) {
const folderStr = String(folder);
const output = path_1.default.join(this.config.outputPath, folderStr);
const input = path_1.default.join(this.config.intermediateOutputPath, folderStr);
this.log(`Summarizing for ${input} (target: ${output})`);
(0, process_2.postProcessFeatureFolder)(this.log, input, this.config, output);
}
return Promise.resolve(undefined);
}
}
exports.StatisticsSummarizer = StatisticsSummarizer;
//# sourceMappingURL=summarizer.js.map