article-parser
Version:
Extract clean article data from given URL.
84 lines (69 loc) • 1.82 kB
JavaScript
// utiles/extractWithEmbedly
var debug = require('debug');
var error = debug('artparser:error');
var info = debug('artparser:info');
var {
EmbedlyKey
} = require('../config');
var {
isValidURL
} = require('../utils');
var loadJSON = require('../utils/loadJSON');
var extractWithEmbedly = (url, key = '') => {
return new Promise((resolve, reject) => {
info(`Start parsing with Embedly...`);
info(url);
if (!isValidURL(url)) {
throw new Error('Invalid URL');
}
let u = encodeURIComponent(url);
let k = key || EmbedlyKey || '';
if (!k) {
throw new Error(`Missing Embedly's key`);
}
let target = `http://api.embed.ly/1/extract?key=${k}&url=${u}&format=json`;
return loadJSON(target).then((o) => {
info(`Standalizing data structure...`);
let author = '';
let authors = o.authors || [];
if (authors.length) {
author = authors.reduce((prev, curr) => {
return prev.concat([curr.name]);
}, []).join(', ');
}
let image = '';
let images = o.images || [];
if (images.length) {
let maxw = 0;
let maxh = 0;
images.forEach((img) => {
if (img.width > maxw && img.height > maxh) {
image = img.url;
maxw = img.width;
maxh = img.height;
}
});
}
info(`Finish parsing with Embedly.`);
let {
url,
title,
content,
description
} = o;
return resolve({
url,
title,
description,
author,
source: o.provider_name || '',
image,
content
});
}).catch((err) => {
error('Error while parsing with Embedly');
return reject(err);
});
});
};
module.exports = extractWithEmbedly;