scrape-meta
Version:
A library to easily scrape metadata from an article on the web using Open Graph metadata, regular HTML metadata, and series of fallbacks.
57 lines (52 loc) • 1.67 kB
JavaScript
;
var isUrl = require('is-url');
var utils = require('../utils'
/**
* Wrap a rule with validation and formatting logic.
*
* @param {Function} rule
* @return {Function} wrapped
*/
);function wrap(rule) {
return function ($, sourceUrl) {
var value = rule($);
if (isUrl(value)) {
return value;
} else if (typeof value === 'string' && sourceUrl) {
return utils.resolveImagePath(sourceUrl, value);
}
return;
};
}
/**
* Rules.
*/
module.exports = [wrap(function ($) {
return $('meta[property="og:image:secure_url"]').attr('content');
}), wrap(function ($) {
return $('meta[property="og:image:url"]').attr('content');
}), wrap(function ($) {
return $('meta[property="og:image"]').attr('content');
}), wrap(function ($) {
return $('meta[name="twitter:image"]').attr('content');
}), wrap(function ($) {
return $('meta[property="twitter:image"]').attr('content');
}), wrap(function ($) {
return $('meta[name="twitter:image:src"]').attr('content');
}), wrap(function ($) {
return $('meta[property="twitter:image:src"]').attr('content');
}), wrap(function ($) {
return $('meta[name="sailthru.image"]').attr('content');
}), wrap(function ($) {
return $('meta[name="sailthru.image.full"]').attr('content');
}), wrap(function ($) {
return $('meta[name="sailthru.image.thumb"]').attr('content');
}), wrap(function ($) {
return $('article img[src]').first().attr('src');
}), wrap(function ($) {
return $('#content img[src]').first().attr('src');
}), wrap(function ($) {
return $('[class*="article"] img[src]').first().attr('src');
}), wrap(function ($) {
return $('img[src]').first().attr('src');
})];