@11me/xparse
Version:
Simple html and rss parser
64 lines (63 loc) • 2.89 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.HTMLParser = void 0;
const cheerio = require("cheerio");
const base_parser_1 = require("./base-parser");
const helpers_1 = require("../helpers");
class HTMLParser extends base_parser_1.BaseParser {
constructor(fetchProvider) {
super(fetchProvider);
}
parse(options) {
return __awaiter(this, void 0, void 0, function* () {
const from = options['description']['url'];
const creator = options['description']['creator'];
const html = yield this.fetch(from);
const $ = cheerio.load(html);
const pages = [];
let feeds;
// get all news link on the page
$(options['description']['page_selector'])
.each((_, page) => {
pages.push($(page).attr('href'));
});
feeds = Promise.all(
// visit each page
pages.map((pageURL) => __awaiter(this, void 0, void 0, function* () {
const guid = (0, helpers_1.hashCode)(pageURL);
const result = {
link: pageURL,
creator,
guid
};
// extract html from the page
const html = yield this.fetch(pageURL);
const $ = cheerio.load(html);
// get requested content via selector
Reflect.ownKeys(options['options']).map(key => {
const selectors = options['options'][key]['selectors'];
selectors.map(selector => {
//result[key] ? (result[key] = result[key] + $(selector).text().trim())
// : (result[key] = $(selector).text().trim())
if (result[key]) {
return result[key] = result[key] + $(selector).text().trim();
}
return result[key] = $(selector).text().trim();
});
});
return result;
})));
return feeds;
});
}
}
exports.HTMLParser = HTMLParser;