@vorlefan/google-news
Version:
Crawler for the newest news from google newspaper RSS
205 lines (164 loc) • 4.38 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.GoogleNews = void 0;
var _path = require("../path");
var _constants = require("../config/constants");
var _xml2json = _interopRequireDefault(require("xml2json"));
var _wait = require("../utils/wait");
function _interopRequireDefault(obj) { return obj && obj.__esModule ? obj : { default: obj }; }
/*
:--------------------------------------------------------------------------
: Bootstrap
:--------------------------------------------------------------------------
*/
class GoogleNews {
/**
* @constructure
*/
constructor(searchs, options) {
this._route = _path.Route;
this._searchs = Array.isArray(searchs) ? searchs : [searchs];
this._options = Object.assign({
saveJson: true,
saveXml: true,
timeout: null,
routeName: 'root',
language: 'en-US',
localization: 'US'
}, options || {});
this._url = '';
this._xml = '';
this._filename = '';
this._news = [];
if (!!this._options.route && typeof this._options.route === 'function') {
this._options.route(this._route);
}
}
/**
* @function run
* @description recursive runs the crawler
*/
async run() {
if (this._searchs.length === 0) return null;
this._currentSearch = this._searchs.shift();
if (!this._currentSearch) return await this.run();
if (!!this._options.timeout && typeof this._options.timeout === 'number') {
await (0, _wait.WaitSec)(this._options.timeout);
}
await this.crawler();
return await this.run();
}
/**
* @function news
*/
news() {
return this._news;
}
/**
* @function news
* @description get the news
*/
async items(orderBy) {
const items = this.news().map(news => news.rss.channel.item);
if (!orderBy) return items;
const orderedItems = [];
await Promise.allSettled(items.map(async item => {
const news = item.sort((a, b) => {
const timeB = new Date(b.pubDate).getTime();
const timeA = new Date(a.pubDate).getTime();
return orderBy === 'desc' ? timeB - timeA : timeA - timeB;
});
orderedItems.push(news);
}));
return orderedItems;
}
/**
* @function crawler
*/
async crawler() {
if (!this._currentSearch) return false;
try {
this._url = (0, _constants.URL_GOOGLE_NEWS)(this._currentSearch, this._options.language || 'en-US', this._options.localization || 'US');
await this.dowload();
await this.load();
this.collect();
await this.clean();
await this.json();
return true;
} catch (error) {
return false;
}
}
/**
* @function json
*/
async json() {
try {
if (!!this._options.saveJson && this._options.saveJson === true) {
await this._route.json().set(this._options.routeName).store({
filename: this._filename.replace('.xml', '.json'),
data: this._json,
force: true
});
}
return true;
} catch (error) {
return false;
}
}
/**
* @function clean
*/
async clean() {
if (!this._json) return false;
try {
if (!this._options.saveXml) {
await this._route.io().remove({
routeName: this._options.routeName,
filename: this._filename
});
}
return true;
} catch (error) {
return false;
}
}
/**
* @function collect
*/
collect() {
if (!this._json) return;
this._news.push(this._json);
}
/**
* @function load
*/
async load() {
const content = await this._route.io().read({
routeName: this._options.routeName,
filename: this._filename
});
this._xml = String(content);
const data = _xml2json.default.toJson(this._xml, {
object: true
});
this._json = data;
}
/**
* @function download
*/
async dowload() {
if (!this._currentSearch) return;
const sanitizeSearch = this._currentSearch.toLocaleLowerCase().replace(/\s/gm, '_');
this._filename = `${sanitizeSearch}.${Date.now()}.xml`;
const destination = this._route.plug(this._options.routeName, this._filename);
await _path.Route.stream().download({
url: this._url,
destination,
protocol: 'https'
});
}
}
exports.GoogleNews = GoogleNews;