open-graph-scraper
Version:
Node.js scraper module for Open Graph and Twitter Card info
225 lines (224 loc) • 10.5 kB
JavaScript
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
exports.fallback = fallback;
const chardet_1 = __importDefault(require("chardet"));
const utils_1 = require("./utils");
const doesElementExist = (selector, attribute, $) => ($(selector).attr(attribute) && ($(selector).attr(attribute)?.length ?? 0) > 0);
/**
* ogs fallbacks
*
* @param {object} ogObject - the current ogObject
* @param {object} options - options for ogs
* @param {object} $ - cheerio.load() of the current html
* @return {object} object with ogs results with updated fallback values
*
*/
function fallback(ogObject, options, $, body) {
const shouldFallback = (key) => {
if (!options.onlyGetOpenGraphInfo) {
return true;
}
if (options.onlyGetOpenGraphInfo === true) {
return false;
}
return !options.onlyGetOpenGraphInfo.includes(key);
};
// title fallback
if (!ogObject.ogTitle && shouldFallback('title')) {
if ($('title').text() && $('title').text().length > 0) {
ogObject.ogTitle = $('title').first().text();
}
else if ($('head > meta[name="title"]').attr('content') && ($('head > meta[name="title"]').attr('content')?.length ?? 0) > 0) {
ogObject.ogTitle = $('head > meta[name="title"]').attr('content');
}
else if ($('.post-title').text() && $('.post-title').text().length > 0) {
ogObject.ogTitle = $('.post-title').text();
}
else if ($('.entry-title').text() && $('.entry-title').text().length > 0) {
ogObject.ogTitle = $('.entry-title').text();
}
else if ($('h1[class*="title" i] a').text() && $('h1[class*="title" i] a').text().length > 0) {
ogObject.ogTitle = $('h1[class*="title" i] a').text();
}
else if ($('h1[class*="title" i]').text() && $('h1[class*="title" i]').text().length > 0) {
ogObject.ogTitle = $('h1[class*="title" i]').text();
}
}
// Get meta description tag if og description was not provided
if (!ogObject.ogDescription && shouldFallback('description')) {
if (doesElementExist('head > meta[name="description"]', 'content', $)) {
ogObject.ogDescription = $('head > meta[name="description"]').attr('content');
}
else if (doesElementExist('head > meta[itemprop="description"]', 'content', $)) {
ogObject.ogDescription = $('head > meta[itemprop="description"]').attr('content');
}
else if ($('#description').text() && $('#description').text().length > 0) {
ogObject.ogDescription = $('#description').text();
}
}
// Get all of images if there is no og:image info
if (!ogObject.ogImage && shouldFallback('image')) {
ogObject.ogImage = [];
$('img').map((index, imageElement) => {
const source = $(imageElement).attr('src') ?? '';
if (!source)
return false;
const type = (0, utils_1.findImageTypeFromUrl)(source);
if (!(0, utils_1.isUrlValid)(source, (options.urlValidatorSettings ?? utils_1.defaultUrlValidatorSettings)) || !(0, utils_1.isImageTypeValid)(type))
return false;
const fallbackImage = {
url: source,
type,
};
if ($(imageElement).attr('width') && Number($(imageElement).attr('width')))
fallbackImage.width = Number($(imageElement).attr('width'));
if ($(imageElement).attr('height') && Number($(imageElement).attr('height')))
fallbackImage.height = Number($(imageElement).attr('height'));
ogObject.ogImage?.push(fallbackImage);
return false;
});
ogObject.ogImage = ogObject.ogImage
.filter((value) => value.url !== undefined && value.url !== '')
.filter((value, index) => index < 10);
if (ogObject.ogImage.length === 0)
delete ogObject.ogImage;
}
else if (ogObject.ogImage) {
ogObject.ogImage.map((image) => {
if (image.url && !image.type) {
const type = (0, utils_1.findImageTypeFromUrl)(image.url);
if ((0, utils_1.isImageTypeValid)(type))
image.type = type;
}
return false;
});
}
// audio fallback
if (!ogObject.ogAudioURL && !ogObject.ogAudioSecureURL && shouldFallback('audioUrl')) {
const audioElementValue = $('audio').attr('src') ?? '';
const audioSourceElementValue = $('audio > source').attr('src') ?? '';
if (doesElementExist('audio', 'src', $)) {
if (audioElementValue.startsWith('https')) {
ogObject.ogAudioSecureURL = audioElementValue;
}
else {
ogObject.ogAudioURL = audioElementValue;
}
const audioElementTypeValue = $('audio').attr('type') ?? '';
if (!ogObject.ogAudioType && doesElementExist('audio', 'type', $))
ogObject.ogAudioType = audioElementTypeValue;
}
else if (doesElementExist('audio > source', 'src', $)) {
if (audioSourceElementValue.startsWith('https')) {
ogObject.ogAudioSecureURL = audioSourceElementValue;
}
else {
ogObject.ogAudioURL = audioSourceElementValue;
}
const audioSourceElementTypeValue = $('audio > source').attr('type') ?? '';
if (!ogObject.ogAudioType && doesElementExist('audio > source', 'type', $))
ogObject.ogAudioType = audioSourceElementTypeValue;
}
}
// locale fallback
if (!ogObject.ogLocale && shouldFallback('locale')) {
if (doesElementExist('html', 'lang', $)) {
ogObject.ogLocale = $('html').attr('lang');
}
else if (doesElementExist('head > meta[itemprop="inLanguage"]', 'content', $)) {
ogObject.ogLocale = $('head > meta[itemprop="inLanguage"]').attr('content');
}
}
// logo fallback
if (!ogObject.ogLogo && shouldFallback('logo')) {
if (doesElementExist('meta[itemprop="logo"]', 'content', $)) {
ogObject.ogLogo = $('meta[itemprop="logo"]').attr('content');
}
else if (doesElementExist('img[itemprop="logo"]', 'src', $)) {
ogObject.ogLogo = $('img[itemprop="logo"]').attr('src');
}
}
// url fallback
if (!ogObject.ogUrl && shouldFallback('url')) {
if (doesElementExist('link[rel="canonical"]', 'href', $)) {
ogObject.ogUrl = $('link[rel="canonical"]').attr('href');
}
else if (doesElementExist('link[rel="alternate"][hreflang="x-default"]', 'href', $)) {
ogObject.ogUrl = $('link[rel="alternate"][hreflang="x-default"]').attr('href');
}
}
// date fallback
if (!ogObject.ogDate && shouldFallback('date')) {
if (doesElementExist('head > meta[name="date"]', 'content', $)) {
ogObject.ogDate = $('head > meta[name="date"]').attr('content');
}
else if (doesElementExist('[itemprop*="datemodified" i]', 'content', $)) {
ogObject.ogDate = $('[itemprop*="datemodified" i]').attr('content');
}
else if (doesElementExist('[itemprop="datepublished" i]', 'content', $)) {
ogObject.ogDate = $('[itemprop="datepublished" i]').attr('content');
}
else if (doesElementExist('[itemprop*="date" i]', 'content', $)) {
ogObject.ogDate = $('[itemprop*="date" i]').attr('content');
}
else if (doesElementExist('time[itemprop*="date" i]', 'datetime', $)) {
ogObject.ogDate = $('time[itemprop*="date" i]').attr('datetime');
}
else if (doesElementExist('time[datetime]', 'datetime', $)) {
ogObject.ogDate = $('time[datetime]').attr('datetime');
}
}
// favicon fallback
if (!ogObject.favicon && shouldFallback('favicon')) {
if (doesElementExist('link[rel="shortcut icon"]', 'href', $)) {
ogObject.favicon = $('link[rel="shortcut icon"]').attr('href');
}
else if (doesElementExist('link[rel="icon"]', 'href', $)) {
ogObject.favicon = $('link[rel="icon"]').attr('href');
}
else if (doesElementExist('link[rel="mask-icon"]', 'href', $)) {
ogObject.favicon = $('link[rel="mask-icon"]').attr('href');
}
else if (doesElementExist('link[rel="apple-touch-icon"]', 'href', $)) {
ogObject.favicon = $('link[rel="apple-touch-icon"]').attr('href');
}
else if (doesElementExist('link[type="image/png"]', 'href', $)) {
ogObject.favicon = $('link[type="image/png"]').attr('href');
}
else if (doesElementExist('link[type="image/ico"]', 'href', $)) {
ogObject.favicon = $('link[type="image/ico"]').attr('href');
}
else if (doesElementExist('link[type="image/x-icon"]', 'href', $)) {
ogObject.favicon = $('link[type="image/x-icon"]').attr('href');
}
else if (doesElementExist('head > meta[property*="appIcon"]', 'content', $)) {
ogObject.favicon = $('head > meta[property*="appIcon"]').attr('content');
}
}
// set the charset
if (doesElementExist('meta', 'charset', $)) {
ogObject.charset = $('meta').attr('charset');
}
else if (doesElementExist('head > meta[name="charset"]', 'content', $)) {
ogObject.charset = $('head > meta[name="charset"]').attr('content');
}
else if (doesElementExist('head > meta[http-equiv="content-type"]', 'content', $)) {
const content = $('head > meta[http-equiv="content-type"]').attr('content') ?? '';
const charsetRegEx = /charset=([^()<>@,;:"/[\]?.=\s]*)/i;
if (charsetRegEx.test(content)) {
const charsetRegExExec = charsetRegEx.exec(content);
if (charsetRegExExec?.[1])
ogObject.charset = charsetRegExExec[1] || 'utf-8';
}
}
else if (body) {
const encoder = new TextEncoder();
const uint8Array = encoder.encode(body);
ogObject.charset = chardet_1.default.detect(uint8Array) ?? '';
}
return ogObject;
}
exports.default = fallback;
;