ayakashi
Version:
The next generation web scraping framework
419 lines (418 loc) • 16.9 kB
JavaScript
"use strict";
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const yargs_1 = __importDefault(require("yargs"));
const prompts_1 = __importDefault(require("prompts"));
const runner_1 = require("../runner/runner");
const opLog_1 = require("../opLog/opLog");
const downloader_1 = require("../chromeDownloader/downloader");
const chromium_1 = require("../store/chromium");
const getDirectory_1 = require("./getDirectory");
const prepareStandard_1 = require("./prepareStandard");
const prepareSimple_1 = require("./prepareSimple");
const prepareFromJson_1 = require("./prepareFromJson");
const getName_1 = require("./scaffold/getName");
const generateProp_1 = require("./scaffold/generateProp");
const generateAction_1 = require("./scaffold/generateAction");
const generateExtractor_1 = require("./scaffold/generateExtractor");
const generatePreloader_1 = require("./scaffold/generatePreloader");
const generateScraper_1 = require("./scaffold/generateScraper");
const generateRenderlessScraper_1 = require("./scaffold/generateRenderlessScraper");
const generateApiScraper_1 = require("./scaffold/generateApiScraper");
const generateScript_1 = require("./scaffold/generateScript");
const generateProject_1 = require("./scaffold/generateProject");
const refreshUA_1 = require("./refreshUA");
const updateStealth_1 = require("./updateStealth");
const tsHelpers_1 = require("./tsHelpers");
const packageJson = require("../../package.json");
yargs_1.default
//@ts-ignore
.command("run [dir]", "Runs a project", (_argv) => {
yargs_1.default
.positional("dir", {
describe: "The root directory of a project or a scraper file when --simple mode is used",
default: "."
})
.option("configFile", {
describe: "Use an alternative configFile",
alias: "c",
default: ""
})
.option("jsonConfig", {
describe: "Use a json string as config",
alias: "jc"
})
.option("sessionKey", {
describe: "Use a specific run session",
default: "default",
coerce: function (v) {
return String(v);
}
})
.option("simple", {
type: "boolean",
describe: "Run a single scraper"
})
.option("simpleRenderless", {
type: "boolean",
describe: "Run a single renderlessScraper"
})
.option("simpleApi", {
type: "boolean",
describe: "Run a single apiScraper"
})
.option("resume", {
type: "boolean",
describe: "Resume execution of a previous unfinished run"
})
.option("restartDisabledSteps", {
type: "boolean",
describe: "Will restart all steps that terminated due to an error. Only works when --resume is used"
})
.option("clean", {
type: "boolean",
describe: "Clear the previous run if it exists and start from the beginning"
})
.option("out", {
describe: "Select the saving format when --simple mode is used",
default: "stdout",
choices: ["sqlite", "csv", "json", "stdout"]
})
.option("skipTsBuild", {
type: "boolean",
describe: "Skip automatic typescript compilation"
})
.epilogue("Learn more at https://ayakashi-io.github.io/docs/reference/cli-commands.html#run");
}, function (argv) {
return __awaiter(this, void 0, void 0, function* () {
const opLog = opLog_1.getOpLog();
opLog.info("Ayakashi version:", packageJson.version);
const resume = argv.resume || false;
const restartDisabledSteps = argv.restartDisabledSteps || false;
const clean = argv.clean || false;
let directory;
let config;
let simpleScraper = null;
if (argv.jsonConfig) {
const fromJson = prepareFromJson_1.prepareFromJson(argv.dir, argv.jsonConfig);
config = fromJson.config;
directory = fromJson.directory;
}
else {
if (argv.simple) {
const simple = prepareSimple_1.prepareSimple(argv.dir, argv.out, "scraper");
config = simple.config;
directory = simple.directory;
simpleScraper = simple.scraper;
}
else if (argv.simpleRenderless) {
const simple = prepareSimple_1.prepareSimple(argv.dir, argv.out, "renderlessScraper");
config = simple.config;
directory = simple.directory;
simpleScraper = simple.scraper;
}
else if (argv.simpleApi) {
const simple = prepareSimple_1.prepareSimple(argv.dir, argv.out, "apiScraper");
config = simple.config;
directory = simple.directory;
simpleScraper = simple.scraper;
}
else {
const standard = yield prepareStandard_1.prepareStandard(argv.dir, argv.configFile, argv.skipTsBuild || false);
config = standard.config;
directory = standard.directory;
}
}
runner_1.run(directory, config, {
resume: resume,
restartDisabledSteps: restartDisabledSteps,
clean: clean,
simpleScraper: simpleScraper,
sessionKey: argv.sessionKey
}).then(function () {
return __awaiter(this, void 0, void 0, function* () {
opLog.info("Nothing more to do!");
});
}).catch(function (err) {
opLog.error("Something went wrong", err);
process.exit(1);
});
});
})
//@ts-ignore
.command("new [dir]", "Generates a new project", (_argv) => {
yargs_1.default
.positional("dir", {
describe: "Where to place the generated files",
default: "."
})
.option("project", {
type: "boolean",
describe: "Generate a new project"
})
.option("scraper", {
type: "boolean",
describe: "Generate a new scraper"
})
.option("renderlessScraper", {
type: "boolean",
describe: "Generate a new renderlessScraper"
})
.option("apiScraper", {
type: "boolean",
describe: "Generate a new apiScraper"
})
.option("script", {
type: "boolean",
describe: "Generate a new script"
})
.option("prop", {
type: "boolean",
describe: "Generate a new prop"
})
.option("action", {
type: "boolean",
describe: "Generate a new action"
})
.option("extractor", {
type: "boolean",
describe: "Generate a new extractor"
})
.option("preloader", {
type: "boolean",
describe: "Generate a new preloader"
})
.option("name", {
type: "string",
describe: "The name of the new scraper|renderlessScraper|script|prop|action|extractor|preloader"
})
.option("ts", {
type: "boolean",
describe: "Generate a typescript project"
})
.option("js", {
type: "boolean",
describe: "Generate a javascript project"
})
.conflicts("ts", "js")
.epilogue("Learn more at https://ayakashi-io.github.io/docs/reference/cli-commands.html#new");
//@ts-ignore
}, function (argv) {
return __awaiter(this, void 0, void 0, function* () {
//tslint:disable cyclomatic-complexity
const opLog = opLog_1.getOpLog();
if ((!argv.prop && !argv.project && !argv.action && !argv.extractor &&
!argv.preloader && !argv.scraper && !argv.renderlessScraper &&
!argv.apiScraper && !argv.script) || argv.project) {
let ts;
if (argv.ts === undefined && argv.js === undefined) {
const response = yield prompts_1.default({
type: "select",
name: "projectType",
message: "Do you want to generate a javascript or typescript project?",
choices: [{
title: "Javascript",
value: "js"
}, {
title: "Typescript",
value: "ts"
}],
instructions: false
});
if (!response.projectType) {
opLog.error("Select a project type to continue");
process.exit(1);
}
if (response.projectType === "ts") {
ts = true;
}
else {
ts = false;
}
}
else if (argv.ts) {
ts = true;
}
else {
ts = false;
}
if (argv.dir === ".") {
yield generateProject_1.generateProject(getDirectory_1.getDirectory(argv.dir), true, ts);
}
else {
yield generateProject_1.generateProject(getDirectory_1.getDirectory(argv.dir, false), false, ts);
}
}
else if (argv.prop) {
const name = yield getName_1.getName(argv.name, "prop");
let directory = getDirectory_1.getDirectory(argv.dir);
const ts = yield tsHelpers_1.isTypescriptProject(directory);
if (ts)
directory = yield tsHelpers_1.getTypescriptRoot(directory);
yield generateProp_1.generateProp(directory, name, ts);
}
else if (argv.action) {
const name = yield getName_1.getName(argv.name, "action");
let directory = getDirectory_1.getDirectory(argv.dir);
const ts = yield tsHelpers_1.isTypescriptProject(directory);
if (ts)
directory = yield tsHelpers_1.getTypescriptRoot(directory);
yield generateAction_1.generateAction(directory, name, ts);
}
else if (argv.extractor) {
const name = yield getName_1.getName(argv.name, "extractor");
let directory = getDirectory_1.getDirectory(argv.dir);
const ts = yield tsHelpers_1.isTypescriptProject(directory);
if (ts)
directory = yield tsHelpers_1.getTypescriptRoot(directory);
yield generateExtractor_1.generateExtractor(directory, name, ts);
}
else if (argv.preloader) {
const name = yield getName_1.getName(argv.name, "preloader");
let directory = getDirectory_1.getDirectory(argv.dir);
const ts = yield tsHelpers_1.isTypescriptProject(directory);
if (ts)
directory = yield tsHelpers_1.getTypescriptRoot(directory);
yield generatePreloader_1.generatePreloader(directory, name, ts);
}
else if (argv.scraper) {
const name = yield getName_1.getName(argv.name, "scraper");
let directory = getDirectory_1.getDirectory(argv.dir);
const ts = yield tsHelpers_1.isTypescriptProject(directory);
if (ts)
directory = yield tsHelpers_1.getTypescriptRoot(directory);
yield generateScraper_1.generateScraper(directory, name, ts);
}
else if (argv.renderlessScraper) {
const name = yield getName_1.getName(argv.name, "renderlessScraper");
let directory = getDirectory_1.getDirectory(argv.dir);
const ts = yield tsHelpers_1.isTypescriptProject(directory);
if (ts)
directory = yield tsHelpers_1.getTypescriptRoot(directory);
yield generateRenderlessScraper_1.generateRenderlessScraper(directory, name, ts);
}
else if (argv.apiScraper) {
const name = yield getName_1.getName(argv.name, "apiScraper");
let directory = getDirectory_1.getDirectory(argv.dir);
const ts = yield tsHelpers_1.isTypescriptProject(directory);
if (ts)
directory = yield tsHelpers_1.getTypescriptRoot(directory);
yield generateApiScraper_1.generateApiScraper(directory, name, ts);
}
else if (argv.script) {
const name = yield getName_1.getName(argv.name, "script");
let directory = getDirectory_1.getDirectory(argv.dir);
const ts = yield tsHelpers_1.isTypescriptProject(directory);
if (ts)
directory = yield tsHelpers_1.getTypescriptRoot(directory);
yield generateScript_1.generateScript(directory, name, ts);
}
});
})
//@ts-ignore
.command("update-chrome", "Downloads the recommended, latest or specified chrome revision", (_argv) => {
yargs_1.default
.option("revision", {
describe: "Download a specific revision. Format: '114.0.5735.133'",
type: "string",
alias: "r"
})
.option("stable", {
describe: "Download the latest stable revision",
type: "boolean"
})
.option("beta", {
describe: "Download the latest beta revision",
type: "boolean"
})
.option("dev", {
describe: "Download the latest dev revision",
type: "boolean"
})
.option("canary", {
describe: "Download the latest canary revision",
type: "boolean"
})
.epilogue("Learn more at https://ayakashi-io.github.io/docs/reference/cli-commands.html#update-chrome");
//@ts-ignore
}, function (argv) {
return __awaiter(this, void 0, void 0, function* () {
const storedRevision = yield chromium_1.getStoredRevision();
const options = { useExact: false, revision: "", useChannel: false, channel: "" };
if (argv.revision) {
options.revision = argv.revision;
options.useExact = true;
}
else if (argv.stable) {
options.channel = "Stable";
options.useChannel = true;
}
else if (argv.beta) {
options.channel = "Beta";
options.useChannel = true;
}
else if (argv.dev) {
options.channel = "Dev";
options.useChannel = true;
}
else if (argv.canary) {
options.channel = "Canary";
options.useChannel = true;
}
else {
options.revision = yield downloader_1.getRecommendedChromiumRevision();
options.useExact = true;
}
yield downloader_1.downloadChromium(options, storedRevision);
});
})
//@ts-ignore
.command("update-ua", "Updates the builtin database of user agent strings", (_argv) => {
yargs_1.default
.epilogue("Learn more at https://ayakashi-io.github.io/docs/installation#updating-subcomponents");
//@ts-ignore
}, function (argv) {
return __awaiter(this, void 0, void 0, function* () {
yield refreshUA_1.refreshUA();
});
})
//@ts-ignore
.command("update-stealth", "Updates the headless chrome stealth patches", (_argv) => {
yargs_1.default
.epilogue("Learn more at https://ayakashi-io.github.io/docs/installation#updating-subcomponents");
//@ts-ignore
}, function (argv) {
return __awaiter(this, void 0, void 0, function* () {
yield updateStealth_1.updateStealthPatches();
});
})
//@ts-ignore
.command("info", "System information", (_argv) => {
//@ts-ignore
}, function (argv) {
return __awaiter(this, void 0, void 0, function* () {
const opLog = opLog_1.getOpLog();
const storedRevision = yield chromium_1.getStoredRevision();
opLog.info(`Ayakashi version: ${packageJson.version}`);
if ((yield chromium_1.isChromiumAlreadyInstalled()) && (yield chromium_1.isCfT())) {
opLog.info(`Chrome revision: ${storedRevision}`);
}
else {
opLog.info(`Chrome revision: none`);
}
});
})
.demandCommand().recommendCommands().strict()
.epilogue("Learn more at https://ayakashi-io.github.io/docs/reference/cli-commands.html")
.argv;