UNPKG

open-graph-scraper

Version:

Node.js scraper module for Open Graph and Twitter Card info

225 lines (224 loc) 10.5 kB
"use strict"; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.fallback = fallback; const chardet_1 = __importDefault(require("chardet")); const utils_1 = require("./utils"); const doesElementExist = (selector, attribute, $) => ($(selector).attr(attribute) && ($(selector).attr(attribute)?.length ?? 0) > 0); /** * ogs fallbacks * * @param {object} ogObject - the current ogObject * @param {object} options - options for ogs * @param {object} $ - cheerio.load() of the current html * @return {object} object with ogs results with updated fallback values * */ function fallback(ogObject, options, $, body) { const shouldFallback = (key) => { if (!options.onlyGetOpenGraphInfo) { return true; } if (options.onlyGetOpenGraphInfo === true) { return false; } return !options.onlyGetOpenGraphInfo.includes(key); }; // title fallback if (!ogObject.ogTitle && shouldFallback('title')) { if ($('title').text() && $('title').text().length > 0) { ogObject.ogTitle = $('title').first().text(); } else if ($('head > meta[name="title"]').attr('content') && ($('head > meta[name="title"]').attr('content')?.length ?? 0) > 0) { ogObject.ogTitle = $('head > meta[name="title"]').attr('content'); } else if ($('.post-title').text() && $('.post-title').text().length > 0) { ogObject.ogTitle = $('.post-title').text(); } else if ($('.entry-title').text() && $('.entry-title').text().length > 0) { ogObject.ogTitle = $('.entry-title').text(); } else if ($('h1[class*="title" i] a').text() && $('h1[class*="title" i] a').text().length > 0) { ogObject.ogTitle = $('h1[class*="title" i] a').text(); } else if ($('h1[class*="title" i]').text() && $('h1[class*="title" i]').text().length > 0) { ogObject.ogTitle = $('h1[class*="title" i]').text(); } } // Get meta description tag if og description was not provided if (!ogObject.ogDescription && shouldFallback('description')) { if (doesElementExist('head > meta[name="description"]', 'content', $)) { ogObject.ogDescription = $('head > meta[name="description"]').attr('content'); } else if (doesElementExist('head > meta[itemprop="description"]', 'content', $)) { ogObject.ogDescription = $('head > meta[itemprop="description"]').attr('content'); } else if ($('#description').text() && $('#description').text().length > 0) { ogObject.ogDescription = $('#description').text(); } } // Get all of images if there is no og:image info if (!ogObject.ogImage && shouldFallback('image')) { ogObject.ogImage = []; $('img').map((index, imageElement) => { const source = $(imageElement).attr('src') ?? ''; if (!source) return false; const type = (0, utils_1.findImageTypeFromUrl)(source); if (!(0, utils_1.isUrlValid)(source, (options.urlValidatorSettings ?? utils_1.defaultUrlValidatorSettings)) || !(0, utils_1.isImageTypeValid)(type)) return false; const fallbackImage = { url: source, type, }; if ($(imageElement).attr('width') && Number($(imageElement).attr('width'))) fallbackImage.width = Number($(imageElement).attr('width')); if ($(imageElement).attr('height') && Number($(imageElement).attr('height'))) fallbackImage.height = Number($(imageElement).attr('height')); ogObject.ogImage?.push(fallbackImage); return false; }); ogObject.ogImage = ogObject.ogImage .filter((value) => value.url !== undefined && value.url !== '') .filter((value, index) => index < 10); if (ogObject.ogImage.length === 0) delete ogObject.ogImage; } else if (ogObject.ogImage) { ogObject.ogImage.map((image) => { if (image.url && !image.type) { const type = (0, utils_1.findImageTypeFromUrl)(image.url); if ((0, utils_1.isImageTypeValid)(type)) image.type = type; } return false; }); } // audio fallback if (!ogObject.ogAudioURL && !ogObject.ogAudioSecureURL && shouldFallback('audioUrl')) { const audioElementValue = $('audio').attr('src') ?? ''; const audioSourceElementValue = $('audio > source').attr('src') ?? ''; if (doesElementExist('audio', 'src', $)) { if (audioElementValue.startsWith('https')) { ogObject.ogAudioSecureURL = audioElementValue; } else { ogObject.ogAudioURL = audioElementValue; } const audioElementTypeValue = $('audio').attr('type') ?? ''; if (!ogObject.ogAudioType && doesElementExist('audio', 'type', $)) ogObject.ogAudioType = audioElementTypeValue; } else if (doesElementExist('audio > source', 'src', $)) { if (audioSourceElementValue.startsWith('https')) { ogObject.ogAudioSecureURL = audioSourceElementValue; } else { ogObject.ogAudioURL = audioSourceElementValue; } const audioSourceElementTypeValue = $('audio > source').attr('type') ?? ''; if (!ogObject.ogAudioType && doesElementExist('audio > source', 'type', $)) ogObject.ogAudioType = audioSourceElementTypeValue; } } // locale fallback if (!ogObject.ogLocale && shouldFallback('locale')) { if (doesElementExist('html', 'lang', $)) { ogObject.ogLocale = $('html').attr('lang'); } else if (doesElementExist('head > meta[itemprop="inLanguage"]', 'content', $)) { ogObject.ogLocale = $('head > meta[itemprop="inLanguage"]').attr('content'); } } // logo fallback if (!ogObject.ogLogo && shouldFallback('logo')) { if (doesElementExist('meta[itemprop="logo"]', 'content', $)) { ogObject.ogLogo = $('meta[itemprop="logo"]').attr('content'); } else if (doesElementExist('img[itemprop="logo"]', 'src', $)) { ogObject.ogLogo = $('img[itemprop="logo"]').attr('src'); } } // url fallback if (!ogObject.ogUrl && shouldFallback('url')) { if (doesElementExist('link[rel="canonical"]', 'href', $)) { ogObject.ogUrl = $('link[rel="canonical"]').attr('href'); } else if (doesElementExist('link[rel="alternate"][hreflang="x-default"]', 'href', $)) { ogObject.ogUrl = $('link[rel="alternate"][hreflang="x-default"]').attr('href'); } } // date fallback if (!ogObject.ogDate && shouldFallback('date')) { if (doesElementExist('head > meta[name="date"]', 'content', $)) { ogObject.ogDate = $('head > meta[name="date"]').attr('content'); } else if (doesElementExist('[itemprop*="datemodified" i]', 'content', $)) { ogObject.ogDate = $('[itemprop*="datemodified" i]').attr('content'); } else if (doesElementExist('[itemprop="datepublished" i]', 'content', $)) { ogObject.ogDate = $('[itemprop="datepublished" i]').attr('content'); } else if (doesElementExist('[itemprop*="date" i]', 'content', $)) { ogObject.ogDate = $('[itemprop*="date" i]').attr('content'); } else if (doesElementExist('time[itemprop*="date" i]', 'datetime', $)) { ogObject.ogDate = $('time[itemprop*="date" i]').attr('datetime'); } else if (doesElementExist('time[datetime]', 'datetime', $)) { ogObject.ogDate = $('time[datetime]').attr('datetime'); } } // favicon fallback if (!ogObject.favicon && shouldFallback('favicon')) { if (doesElementExist('link[rel="shortcut icon"]', 'href', $)) { ogObject.favicon = $('link[rel="shortcut icon"]').attr('href'); } else if (doesElementExist('link[rel="icon"]', 'href', $)) { ogObject.favicon = $('link[rel="icon"]').attr('href'); } else if (doesElementExist('link[rel="mask-icon"]', 'href', $)) { ogObject.favicon = $('link[rel="mask-icon"]').attr('href'); } else if (doesElementExist('link[rel="apple-touch-icon"]', 'href', $)) { ogObject.favicon = $('link[rel="apple-touch-icon"]').attr('href'); } else if (doesElementExist('link[type="image/png"]', 'href', $)) { ogObject.favicon = $('link[type="image/png"]').attr('href'); } else if (doesElementExist('link[type="image/ico"]', 'href', $)) { ogObject.favicon = $('link[type="image/ico"]').attr('href'); } else if (doesElementExist('link[type="image/x-icon"]', 'href', $)) { ogObject.favicon = $('link[type="image/x-icon"]').attr('href'); } else if (doesElementExist('head > meta[property*="appIcon"]', 'content', $)) { ogObject.favicon = $('head > meta[property*="appIcon"]').attr('content'); } } // set the charset if (doesElementExist('meta', 'charset', $)) { ogObject.charset = $('meta').attr('charset'); } else if (doesElementExist('head > meta[name="charset"]', 'content', $)) { ogObject.charset = $('head > meta[name="charset"]').attr('content'); } else if (doesElementExist('head > meta[http-equiv="content-type"]', 'content', $)) { const content = $('head > meta[http-equiv="content-type"]').attr('content') ?? ''; const charsetRegEx = /charset=([^()<>@,;:"/[\]?.=\s]*)/i; if (charsetRegEx.test(content)) { const charsetRegExExec = charsetRegEx.exec(content); if (charsetRegExExec?.[1]) ogObject.charset = charsetRegExExec[1] || 'utf-8'; } } else if (body) { const encoder = new TextEncoder(); const uint8Array = encoder.encode(body); ogObject.charset = chardet_1.default.detect(uint8Array) ?? ''; } return ogObject; } exports.default = fallback;