open-graph-scraper
Version:
Node.js scraper module for Open Graph and Twitter Card info
130 lines (118 loc) • 5.8 kB
JavaScript
const { findImageTypeFromUrl, isImageTypeValid } = require('./utils');
const doesElementExist = (selector, attribute, $) => (
$(selector).attr(attribute) && $(selector).attr(attribute).length > 0
);
const fallback = (ogObject, options, $) => {
// title fallback
if (!ogObject.ogTitle) {
if ($('title').text() && $('title').text().length > 0) {
ogObject.ogTitle = $('title').text();
} else if ($('head > meta[name="title"]').attr('content') && $('head > meta[name="title"]').attr('content').length > 0) {
ogObject.ogTitle = $('head > meta[name="title"]').attr('content');
} else if ($('.post-title').text() && $('.post-title').text().length > 0) {
ogObject.ogTitle = $('.post-title').text();
} else if ($('.entry-title').text() && $('.entry-title').text().length > 0) {
ogObject.ogTitle = $('.entry-title').text();
} else if ($('h1[class*="title" i] a').text() && $('h1[class*="title" i] a').text().length > 0) {
ogObject.ogTitle = $('h1[class*="title" i] a').text();
} else if ($('h1[class*="title" i]').text() && $('h1[class*="title" i]').text().length > 0) {
ogObject.ogTitle = $('h1[class*="title" i]').text();
}
}
// Get meta description tag if og description was not provided
if (!ogObject.ogDescription) {
if (doesElementExist('head > meta[name="description"]', 'content', $)) {
ogObject.ogDescription = $('head > meta[name="description"]').attr('content');
} else if (doesElementExist('head > meta[itemprop="description"]', 'content', $)) {
ogObject.ogDescription = $('head > meta[itemprop="description"]').attr('content');
} else if ($('#description').text() && $('#description').text().length > 0) {
ogObject.ogDescription = $('#description').text();
}
}
// Get all of images if there is no og:image info
if (!ogObject.ogImage && options.ogImageFallback) {
ogObject.ogImage = [];
$('img').map((index, imageElement) => {
if (doesElementExist(imageElement, 'src', $)) {
const type = findImageTypeFromUrl($(imageElement).attr('src'));
if (!isImageTypeValid(type)) return false;
ogObject.ogImage.push({
url: $(imageElement).attr('src'),
width: $(imageElement).attr('width') || null,
height: $(imageElement).attr('height') || null,
type,
});
}
return false;
});
if (ogObject.ogImage.length === 0) delete ogObject.ogImage;
} else if (ogObject.ogImage && ogObject.ogImage.url && !ogObject.ogImage.type) {
// if there isn't a type, try to pull it from the URL
const type = findImageTypeFromUrl(ogObject.ogImage.url);
if (isImageTypeValid(type)) ogObject.ogImage.type = type;
}
// audio fallback
if (!ogObject.ogAudioURL && !ogObject.ogAudioSecureURL) {
const audioElementValue = $('audio').attr('src');
const audioSourceElementValue = $('audio > source').attr('src');
if (doesElementExist('audio', 'src', $)) {
if (audioElementValue.startsWith('https')) {
ogObject.ogAudioSecureURL = audioElementValue;
} else {
ogObject.ogAudioURL = audioElementValue;
}
const audioElementTypeValue = $('audio').attr('type');
if (!ogObject.ogAudioType && doesElementExist('audio', 'type', $)) ogObject.ogAudioType = audioElementTypeValue;
} else if (doesElementExist('audio > source', 'src', $)) {
if (audioSourceElementValue.startsWith('https')) {
ogObject.ogAudioSecureURL = audioSourceElementValue;
} else {
ogObject.ogAudioURL = audioSourceElementValue;
}
const audioSourceElementTypeValue = $('audio > source').attr('type');
if (!ogObject.ogAudioType && doesElementExist('audio > source', 'type', $)) ogObject.ogAudioType = audioSourceElementTypeValue;
}
}
// locale fallback
if (!ogObject.ogLocale) {
if (doesElementExist('html', 'lang', $)) {
ogObject.ogLocale = $('html').attr('lang');
} else if (doesElementExist('head > meta[itemprop="inLanguage"]', 'content', $)) {
ogObject.ogLocale = $('head > meta[itemprop="inLanguage"]').attr('content');
}
}
// logo fallback
if (!ogObject.ogLogo) {
if (doesElementExist('meta[itemprop="logo"]', 'content', $)) {
ogObject.ogLogo = $('meta[itemprop="logo"]').attr('content');
} else if (doesElementExist('img[itemprop="logo"]', 'src', $)) {
ogObject.ogLogo = $('img[itemprop="logo"]').attr('src');
}
}
// url fallback
if (!ogObject.ogUrl) {
if (doesElementExist('link[rel="canonical"]', 'href', $)) {
ogObject.ogUrl = $('link[rel="canonical"]').attr('href');
} else if (doesElementExist('link[rel="alternate"][hreflang="x-default"]', 'href', $)) {
ogObject.ogUrl = $('link[rel="alternate"][hreflang="x-default"]').attr('href');
}
}
// date fallback
if (!ogObject.ogDate) {
if (doesElementExist('head > meta[name="date"]', 'content', $)) {
ogObject.ogDate = $('head > meta[name="date"]').attr('content');
} else if (doesElementExist('[itemprop*="datemodified" i]', 'content', $)) {
ogObject.ogDate = $('[itemprop*="datemodified" i]').attr('content');
} else if (doesElementExist('[itemprop="datepublished" i]', 'content', $)) {
ogObject.ogDate = $('[itemprop="datepublished" i]').attr('content');
} else if (doesElementExist('[itemprop*="date" i]', 'content', $)) {
ogObject.ogDate = $('[itemprop*="date" i]').attr('content');
} else if (doesElementExist('time[itemprop*="date" i]', 'datetime', $)) {
ogObject.ogDate = $('time[itemprop*="date" i]').attr('datetime');
} else if (doesElementExist('time[datetime]', 'datetime', $)) {
ogObject.ogDate = $('time[datetime]').attr('datetime');
}
}
return ogObject;
};
module.exports = fallback;