ayakashi
Version:
The next generation web scraping framework
90 lines (85 loc) • 3.31 kB
JavaScript
"use strict";
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.generateRenderlessScraper = void 0;
const mkdirp_1 = __importDefault(require("mkdirp"));
const util_1 = require("util");
const path_1 = require("path");
const fs_1 = require("fs");
const opLog_1 = require("../../opLog/opLog");
const mkdirp = util_1.promisify(mkdirp_1.default);
const writeFile = util_1.promisify(fs_1.writeFile);
const exists = util_1.promisify(fs_1.exists);
function generateRenderlessScraper(directory, name, ts) {
return __awaiter(this, void 0, void 0, function* () {
const opLog = opLog_1.getOpLog();
const ext = ts ? ".ts" : ".js";
let fileName;
if (name.indexOf(ext) > -1) {
fileName = name;
}
else {
fileName = `${name}${ext}`;
}
const scrapersFolder = path_1.join(directory, "scrapers");
const filePath = path_1.join(scrapersFolder, fileName);
if (yield exists(filePath)) {
opLog.error(`scraper <${name}> already exists in ${filePath}`);
return;
}
yield mkdirp(scrapersFolder);
const content = ts ? getContentTS() : getContent();
yield writeFile(filePath, content);
opLog.info(`Created <${name}> in ${filePath}`);
});
}
exports.generateRenderlessScraper = generateRenderlessScraper;
function getContent() {
return (`/**
* @param {import("@ayakashi/types").IRenderlessAyakashiInstance} ayakashi
*/
module.exports = async function(ayakashi, input, params) {
await ayakashi.load(input.page);
ayakashi
.select("name")
.where({itemprop: {eq: "name"}});
ayakashi
.select("author")
.where({itemprop: {eq: "author"}});
return {
name: await ayakashi.extractFirst("name"),
author: await ayakashi.extractFirst("author")
};
};
`);
}
function getContentTS() {
return (`import {IRenderlessAyakashiInstance} from "@ayakashi/types";
type ScraperInput = {page: string};
type ScraperParams = {};
export default async function(ayakashi: IRenderlessAyakashiInstance, input: ScraperInput, params: ScraperParams) {
await ayakashi.load(input.page);
ayakashi
.select("name")
.where({itemprop: {eq: "name"}});
ayakashi
.select("author")
.where({itemprop: {eq: "author"}});
return {
name: await ayakashi.extractFirst("name"),
author: await ayakashi.extractFirst("author")
};
}
`);
}