UNPKG

@11me/xparse

Version:

Simple html and rss parser

64 lines (63 loc) 2.89 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; Object.defineProperty(exports, "__esModule", { value: true }); exports.HTMLParser = void 0; const cheerio = require("cheerio"); const base_parser_1 = require("./base-parser"); const helpers_1 = require("../helpers"); class HTMLParser extends base_parser_1.BaseParser { constructor(fetchProvider) { super(fetchProvider); } parse(options) { return __awaiter(this, void 0, void 0, function* () { const from = options['description']['url']; const creator = options['description']['creator']; const html = yield this.fetch(from); const $ = cheerio.load(html); const pages = []; let feeds; // get all news link on the page $(options['description']['page_selector']) .each((_, page) => { pages.push($(page).attr('href')); }); feeds = Promise.all( // visit each page pages.map((pageURL) => __awaiter(this, void 0, void 0, function* () { const guid = (0, helpers_1.hashCode)(pageURL); const result = { link: pageURL, creator, guid }; // extract html from the page const html = yield this.fetch(pageURL); const $ = cheerio.load(html); // get requested content via selector Reflect.ownKeys(options['options']).map(key => { const selectors = options['options'][key]['selectors']; selectors.map(selector => { //result[key] ? (result[key] = result[key] + $(selector).text().trim()) // : (result[key] = $(selector).text().trim()) if (result[key]) { return result[key] = result[key] + $(selector).text().trim(); } return result[key] = $(selector).text().trim(); }); }); return result; }))); return feeds; }); } } exports.HTMLParser = HTMLParser;