UNPKG

ayakashi

Version:

The next generation web scraping framework

90 lines (85 loc) 3.31 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.generateRenderlessScraper = void 0; const mkdirp_1 = __importDefault(require("mkdirp")); const util_1 = require("util"); const path_1 = require("path"); const fs_1 = require("fs"); const opLog_1 = require("../../opLog/opLog"); const mkdirp = util_1.promisify(mkdirp_1.default); const writeFile = util_1.promisify(fs_1.writeFile); const exists = util_1.promisify(fs_1.exists); function generateRenderlessScraper(directory, name, ts) { return __awaiter(this, void 0, void 0, function* () { const opLog = opLog_1.getOpLog(); const ext = ts ? ".ts" : ".js"; let fileName; if (name.indexOf(ext) > -1) { fileName = name; } else { fileName = `${name}${ext}`; } const scrapersFolder = path_1.join(directory, "scrapers"); const filePath = path_1.join(scrapersFolder, fileName); if (yield exists(filePath)) { opLog.error(`scraper <${name}> already exists in ${filePath}`); return; } yield mkdirp(scrapersFolder); const content = ts ? getContentTS() : getContent(); yield writeFile(filePath, content); opLog.info(`Created <${name}> in ${filePath}`); }); } exports.generateRenderlessScraper = generateRenderlessScraper; function getContent() { return (`/** * @param {import("@ayakashi/types").IRenderlessAyakashiInstance} ayakashi */ module.exports = async function(ayakashi, input, params) { await ayakashi.load(input.page); ayakashi .select("name") .where({itemprop: {eq: "name"}}); ayakashi .select("author") .where({itemprop: {eq: "author"}}); return { name: await ayakashi.extractFirst("name"), author: await ayakashi.extractFirst("author") }; }; `); } function getContentTS() { return (`import {IRenderlessAyakashiInstance} from "@ayakashi/types"; type ScraperInput = {page: string}; type ScraperParams = {}; export default async function(ayakashi: IRenderlessAyakashiInstance, input: ScraperInput, params: ScraperParams) { await ayakashi.load(input.page); ayakashi .select("name") .where({itemprop: {eq: "name"}}); ayakashi .select("author") .where({itemprop: {eq: "author"}}); return { name: await ayakashi.extractFirst("name"), author: await ayakashi.extractFirst("author") }; } `); }