UNPKG

@vorlefan/google-news

Version:

Crawler for the newest news from google newspaper RSS

205 lines (164 loc) 4.38 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.GoogleNews = void 0; var _path = require("../path"); var _constants = require("../config/constants"); var _xml2json = _interopRequireDefault(require("xml2json")); var _wait = require("../utils/wait"); function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; } /* :-------------------------------------------------------------------------- : Bootstrap :-------------------------------------------------------------------------- */ class GoogleNews { /** * @constructure */ constructor(searchs, options) { this._route = _path.Route; this._searchs = Array.isArray(searchs) ? searchs : [searchs]; this._options = Object.assign({ saveJson: true, saveXml: true, timeout: null, routeName: 'root', language: 'en-US', localization: 'US' }, options || {}); this._url = ''; this._xml = ''; this._filename = ''; this._news = []; if (!!this._options.route && typeof this._options.route === 'function') { this._options.route(this._route); } } /** * @function run * @description recursive runs the crawler */ async run() { if (this._searchs.length === 0) return null; this._currentSearch = this._searchs.shift(); if (!this._currentSearch) return await this.run(); if (!!this._options.timeout && typeof this._options.timeout === 'number') { await (0, _wait.WaitSec)(this._options.timeout); } await this.crawler(); return await this.run(); } /** * @function news */ news() { return this._news; } /** * @function news * @description get the news */ async items(orderBy) { const items = this.news().map(news => news.rss.channel.item); if (!orderBy) return items; const orderedItems = []; await Promise.allSettled(items.map(async item => { const news = item.sort((a, b) => { const timeB = new Date(b.pubDate).getTime(); const timeA = new Date(a.pubDate).getTime(); return orderBy === 'desc' ? timeB - timeA : timeA - timeB; }); orderedItems.push(news); })); return orderedItems; } /** * @function crawler */ async crawler() { if (!this._currentSearch) return false; try { this._url = (0, _constants.URL_GOOGLE_NEWS)(this._currentSearch, this._options.language || 'en-US', this._options.localization || 'US'); await this.dowload(); await this.load(); this.collect(); await this.clean(); await this.json(); return true; } catch (error) { return false; } } /** * @function json */ async json() { try { if (!!this._options.saveJson && this._options.saveJson === true) { await this._route.json().set(this._options.routeName).store({ filename: this._filename.replace('.xml', '.json'), data: this._json, force: true }); } return true; } catch (error) { return false; } } /** * @function clean */ async clean() { if (!this._json) return false; try { if (!this._options.saveXml) { await this._route.io().remove({ routeName: this._options.routeName, filename: this._filename }); } return true; } catch (error) { return false; } } /** * @function collect */ collect() { if (!this._json) return; this._news.push(this._json); } /** * @function load */ async load() { const content = await this._route.io().read({ routeName: this._options.routeName, filename: this._filename }); this._xml = String(content); const data = _xml2json.default.toJson(this._xml, { object: true }); this._json = data; } /** * @function download */ async dowload() { if (!this._currentSearch) return; const sanitizeSearch = this._currentSearch.toLocaleLowerCase().replace(/\s/gm, '_'); this._filename = `${sanitizeSearch}.${Date.now()}.xml`; const destination = this._route.plug(this._options.routeName, this._filename); await _path.Route.stream().download({ url: this._url, destination, protocol: 'https' }); } } exports.GoogleNews = GoogleNews;