UNPKG

@nanggo/social-preview

Version:

Generate beautiful social media preview images from any URL

246 lines (245 loc) 7.93 kB
"use strict"; /** * Metadata Extractor Module * Extracts Open Graph and Twitter Card metadata from URLs */ var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); exports.extractMetadata = extractMetadata; exports.fetchImage = fetchImage; exports.validateMetadata = validateMetadata; exports.applyFallbacks = applyFallbacks; const open_graph_scraper_1 = __importDefault(require("open-graph-scraper")); const axios_1 = __importDefault(require("axios")); const types_1 = require("../types"); /** * Extract metadata from a given URL * @param url - The URL to extract metadata from * @returns Extracted metadata object */ async function extractMetadata(url) { try { // Validate URL const validatedUrl = validateUrl(url); // Extract Open Graph data const ogData = await fetchOpenGraphData(validatedUrl); // Parse and normalize metadata const metadata = parseMetadata(ogData, validatedUrl); return metadata; } catch (error) { if (error instanceof types_1.PreviewGeneratorError) { throw error; } throw new types_1.PreviewGeneratorError(types_1.ErrorType.METADATA_ERROR, `Failed to extract metadata from ${url}: ${error instanceof Error ? error.message : String(error)}`, error); } } /** * Validate and normalize URL */ function validateUrl(url) { try { const urlObj = new URL(url); // Ensure protocol is http or https if (!['http:', 'https:'].includes(urlObj.protocol)) { throw new Error('Invalid protocol. Only HTTP and HTTPS are supported.'); } return urlObj.toString(); } catch (error) { throw new types_1.PreviewGeneratorError(types_1.ErrorType.VALIDATION_ERROR, `Invalid URL: ${url}`, error); } } /** * Fetch Open Graph data using open-graph-scraper */ async function fetchOpenGraphData(url) { try { // First, try to fetch HTML content const response = await axios_1.default.get(url, { headers: { 'User-Agent': 'Mozilla/5.0 (compatible; SocialPreviewBot/1.0)', 'Accept': 'text/html,application/xhtml+xml', }, timeout: 10000, maxRedirects: 5, }); // Extract OG data from HTML const { error, result } = await (0, open_graph_scraper_1.default)({ html: response.data, url }); if (error) { throw new Error('Failed to parse Open Graph data'); } return result; } catch (error) { // Fallback: Try direct OG scraping try { const { error: ogError, result } = await (0, open_graph_scraper_1.default)({ url }); if (ogError) { throw new Error('Failed to fetch Open Graph data'); } return result; } catch (fallbackError) { throw new types_1.PreviewGeneratorError(types_1.ErrorType.FETCH_ERROR, `Failed to fetch data from ${url}`, fallbackError); } } } /** * Parse and normalize metadata from Open Graph data */ function parseMetadata(ogData, url) { const urlObj = new URL(url); // Extract title (prioritize OG title, then Twitter, then HTML title) const title = ogData.ogTitle || ogData.twitterTitle || ogData.dcTitle || ogData.title || urlObj.hostname; // Extract description const description = ogData.ogDescription || ogData.twitterDescription || ogData.dcDescription || ogData.description || ''; // Extract image URL (prioritize OG image, then Twitter image) let image; if (ogData.ogImage) { if (Array.isArray(ogData.ogImage)) { image = ogData.ogImage[0]?.url || ogData.ogImage[0]; } else if (typeof ogData.ogImage === 'object') { image = ogData.ogImage.url; } else { image = ogData.ogImage; } } else if (ogData.twitterImage) { if (Array.isArray(ogData.twitterImage)) { image = ogData.twitterImage[0]?.url || ogData.twitterImage[0]; } else if (typeof ogData.twitterImage === 'object') { image = ogData.twitterImage.url; } else { image = ogData.twitterImage; } } // Ensure image URL is absolute if (image && !image.startsWith('http')) { try { const imageUrl = new URL(image, url); image = imageUrl.toString(); } catch { image = undefined; } } // Extract site name const siteName = ogData.ogSiteName || ogData.twitterSite || ogData.applicationName || urlObj.hostname.replace('www.', ''); // Extract favicon let favicon; if (ogData.favicon) { favicon = ogData.favicon; if (favicon && !favicon.startsWith('http')) { try { const faviconUrl = new URL(favicon, url); favicon = faviconUrl.toString(); } catch { // Try default favicon path favicon = `${urlObj.protocol}//${urlObj.hostname}/favicon.ico`; } } } else { // Default favicon path favicon = `${urlObj.protocol}//${urlObj.hostname}/favicon.ico`; } // Extract author const author = ogData.author || ogData.dcCreator || ogData.twitterCreator || ogData.articleAuthor; // Extract published date const publishedDate = ogData.ogArticlePublishedTime || ogData.articlePublishedTime || ogData.dcDate || ogData.publishedTime; // Extract locale const locale = ogData.ogLocale || ogData.inLanguage || 'en_US'; return { title: cleanText(title), description: description ? cleanText(description) : undefined, image, siteName: siteName ? cleanText(siteName) : undefined, favicon, author: author ? cleanText(author) : undefined, publishedDate, url, domain: urlObj.hostname, locale, }; } /** * Clean and normalize text */ function cleanText(text) { return text .replace(/[\n\r]+/g, ' ') .replace(/\s+/g, ' ') .trim(); } /** * Fetch image from URL and return as buffer * @param imageUrl - URL of the image to fetch * @returns Image buffer */ async function fetchImage(imageUrl) { try { const response = await axios_1.default.get(imageUrl, { responseType: 'arraybuffer', headers: { 'User-Agent': 'Mozilla/5.0 (compatible; SocialPreviewBot/1.0)', }, timeout: 15000, maxRedirects: 5, }); return Buffer.from(response.data); } catch (error) { throw new types_1.PreviewGeneratorError(types_1.ErrorType.IMAGE_ERROR, `Failed to fetch image from ${imageUrl}`, error); } } /** * Validate metadata to ensure required fields are present */ function validateMetadata(metadata) { return !!(metadata.title && metadata.title.length > 0); } /** * Apply fallback values to incomplete metadata */ function applyFallbacks(metadata, url) { const urlObj = new URL(url); return { title: metadata.title || urlObj.hostname, description: metadata.description, image: metadata.image, siteName: metadata.siteName || urlObj.hostname.replace('www.', ''), favicon: metadata.favicon || `${urlObj.protocol}//${urlObj.hostname}/favicon.ico`, author: metadata.author, publishedDate: metadata.publishedDate, url: metadata.url || url, domain: metadata.domain || urlObj.hostname, locale: metadata.locale || 'en_US', }; }