article-parser
Version:
Extract clean article data from given URL.
58 lines (48 loc) • 993 B
JavaScript
/**
* Article parser
* @ndaidong
**/
global.Promise = require('promise-wtf');
var {
md5
} = require('bellajs');
var {
fetchOptions,
configure,
getConfig
} = require('./config');
var {
isValidURL,
removeUTM,
cache,
loadHTML
} = require('./utils');
var extractWithEmbedly = require('./parsers/extractWithEmbedly');
var parse = require('./parsers');
var extract = (url = '') => {
return new Promise((resolve, reject) => {
if (!isValidURL(url)) {
throw new Error('Invalid URL');
}
let _url = removeUTM(url);
let id = md5(_url);
let stored = cache.get(id);
if (stored) {
return resolve(stored);
}
return loadHTML(_url, fetchOptions).then((html) => {
return parse({url: _url, html});
}).then((article) => {
cache.set(id, article);
return resolve(article);
}).catch((err) => {
return reject(err);
});
});
};
module.exports = {
configure,
getConfig,
extract,
extractWithEmbedly
};