UNPKG

link-previu

Version:

A Node.js library for getting link previews

448 lines (439 loc) 13.4 kB
// src/index.ts import axios from "axios"; import NodeCache from "node-cache"; import * as cheerio from "cheerio"; // src/helpers.ts import urlRegexSafe from "url-regex-safe"; function isValidUrl(url) { if (!url) return false; try { new URL(url); return urlRegexSafe({ exact: true }).test(url); } catch { return false; } } function getVideoIdFromYoutubeUrl(url) { const match = url.match(/^(?:https?:\/\/)?(?:www\.)?(?:m\.)?(?:youtu\.be\/|youtube\.com\/(?:embed\/|v\/|watch\?(?:.*&)?v=|shorts\/|live\/))([^"&?\/\s]{11})/); return match ? match[1] : null; } function hasAllMetadata(data) { return !!(data.title && data.desc && data.image && data.url); } // src/parsers/html.parser.ts var HtmlParser = class { constructor(document) { this.document = document; } parse() { const baseDto = {}; baseDto.title = this.getTitle(); baseDto.desc = this.getDescription(); baseDto.image = this.getImage(); baseDto.siteName = this.getSiteName(); return baseDto; } getTitle() { return this.document("head title").text(); } getDescription() { return this.document("head meta[name='description']").attr("content") || this.document("head meta[property='description']").attr("content"); } getImage() { return this.document("body img").first().attr("src"); } getSiteName() { return this.document("head meta[name='site_name']").attr("content") || this.document("head meta[property='site_name']").attr("content"); } }; // src/parsers/open-graph.parser.ts var OpenGraphParser = class { constructor(document) { this.document = document; } parse() { const metadata = {}; metadata.title = this.getMetaProperty("og:title"); metadata.desc = this.getMetaProperty("og:description"); metadata.image = this.getMetaProperty("og:image"); metadata.url = this.getMetaProperty("og:url"); metadata.siteName = this.getMetaProperty("og:site_name"); return metadata; } getMetaProperty(property) { return this.document(`meta[property='${property}']`).attr("content") || this.document(`meta[name='${property}']`).attr("content"); } }; // src/parsers/json-ld.parser.ts var JsonLdParser = class { constructor(document) { this.document = document; this.jsonData = this.parseToJson(document); } jsonData; parse() { const metadata = {}; metadata.title = this.getTitle(); metadata.desc = this.getDescription(); metadata.image = this.getImage(); metadata.siteName = new OpenGraphParser(this.document).parse().siteName; return metadata; } parseToJson(document) { const data = document("script[type='application/ld+json']").html(); if (!data) return null; try { const cleanedData = data.replace(/\n/g, " ").trim(); return JSON.parse(cleanedData); } catch (error) { return null; } } getTitle() { const data = this.jsonData; if (!data) return void 0; if (Array.isArray(data) && data.length > 0) { return data[0]["name"] || data[0]["headline"]; } else if (typeof data === "object" && data !== null) { return data["name"] || data["headline"]; } return void 0; } getDescription() { const data = this.jsonData; if (!data) return void 0; if (Array.isArray(data) && data.length > 0) { return data[0]["description"] || data[0]["headline"]; } else if (typeof data === "object" && data !== null) { return data["description"] || data["headline"]; } return void 0; } getImage() { const data = this.jsonData; if (!data) return void 0; if (Array.isArray(data) && data.length > 0) { return this.imageResultToString(data[0]["logo"] || data[0]["image"]); } else if (typeof data === "object" && data !== null) { return this.imageResultToString(data["logo"] || data["image"]); } return void 0; } imageResultToString(result) { if (Array.isArray(result) && result.length > 0) { result = result[0]; } if (typeof result === "string") { return result; } else if (result && typeof result === "object") { return result["url"] || result["contentUrl"]; } return void 0; } }; // src/parsers/other.parser.ts var OtherParser = class { constructor(document) { this.document = document; } parse() { const metadata = {}; metadata.title = this.getTitle(); metadata.desc = this.getDescription(); metadata.image = this.getImage(); metadata.siteName = this.getSiteName(); metadata.url = this.getUrl(); return metadata; } getTitle() { return this.getMetaProperty("name", "title"); } getDescription() { return this.getMetaProperty("name", "description"); } getImage() { return this.getMetaProperty("name", "image"); } getSiteName() { return this.getMetaProperty("name", "site_name"); } getUrl() { return this.getMetaProperty("name", "url"); } getMetaProperty(attribute, property) { return this.document(`meta[${attribute}='${property}']`).attr("content"); } }; // src/parsers/twitter.parser.ts var TwitterParser = class { constructor(document) { this.document = document; } parse() { const metadata = {}; const ogParser = new OpenGraphParser(this.document).parse(); metadata.title = this.getTitle(); metadata.desc = this.getDescription(); metadata.image = this.getImage(); metadata.title = metadata.title || ogParser.title; metadata.desc = metadata.desc || ogParser.desc; metadata.image = metadata.image || ogParser.image; metadata.siteName = ogParser.siteName; metadata.url = ogParser.url; if (!metadata.title && ogParser.siteName?.includes("Twitter")) { metadata.title = "Twitter Post"; } if (metadata.image?.endsWith(".svg") && !metadata.image.includes("twimg.com/media")) { metadata.image = void 0; } return metadata; } getTitle() { return this.getMetaProperty("name", "twitter:title") || this.getMetaProperty("property", "twitter:title") || this.document('meta[name="title"]').attr("content"); } getDescription() { return this.getMetaProperty("name", "twitter:description") || this.getMetaProperty("property", "twitter:description") || this.document('meta[name="description"]').attr("content"); } getImage() { const image = this.getMetaProperty("name", "twitter:image") || this.getMetaProperty("property", "twitter:image") || this.getMetaProperty("name", "twitter:image:src") || this.getMetaProperty("property", "twitter:image:src"); if (image?.endsWith(".svg") && !image.includes("media")) { return void 0; } return image; } getMetaProperty(attribute, property) { return this.document(`meta[${attribute}='${property}']`).attr("content"); } }; // src/parsers/youtube.parser.ts var YoutubeParser = class { constructor(document) { this.document = document; this.jsonData = this.parseToJson(document); } jsonData; parse() { const metadata = {}; metadata.title = this.getTitle(); metadata.image = this.getImage(); metadata.siteName = this.getSiteName(); metadata.url = this.getUrl(); return metadata; } parseToJson(document) { try { const html = document.html() || ""; const cleanedData = html.replace("<html><head></head><body>", "").replace("</body></html>", "").replace(/\n/g, " ").trim(); return JSON.parse(cleanedData); } catch (error) { return null; } } getTitle() { const data = this.jsonData; if (!data) return void 0; if (Array.isArray(data) && data.length > 0) { return data[0]["title"]; } else if (typeof data === "object" && data !== null) { return data["title"]; } return void 0; } getImage() { const data = this.jsonData; if (!data) return void 0; if (Array.isArray(data) && data.length > 0) { return this.imageResultToString(data[0]["thumbnail_url"]); } else if (typeof data === "object" && data !== null) { return this.imageResultToString(data["thumbnail_url"]); } return void 0; } getSiteName() { const data = this.jsonData; if (!data) return void 0; if (Array.isArray(data) && data.length > 0) { return data[0]["provider_name"]; } else if (typeof data === "object" && data !== null) { return data["provider_name"]; } return void 0; } getUrl() { const data = this.jsonData; if (!data) return void 0; if (Array.isArray(data) && data.length > 0) { return data[0]["provider_url"]; } else if (typeof data === "object" && data !== null) { return data["provider_url"]; } return void 0; } imageResultToString(result) { if (Array.isArray(result) && result.length > 0) { result = result[0]; } if (typeof result === "string") { return result; } else if (result && typeof result === "object") { return result["url"] || result["contentUrl"]; } return void 0; } }; // src/index.ts var LinkPreview = class { redis; nodeCache; cacheMaxAge; cacheEnabled; requestTimeout; maxRedirects; httpHeaders = { "User-Agent": "WhatsApp/2.23.4.79 A" // default option }; /** * Creates a new LinkPreview instance * @param options Configuration options for link preview fetching and caching */ constructor(options) { this.requestTimeout = options.requestTimeout ?? 5e3; this.cacheMaxAge = options.cacheMaxAge ?? 0; this.cacheEnabled = Boolean(options.cacheMaxAge); this.maxRedirects = options.maxRedirects ?? 5; this.httpHeaders = { ...this.httpHeaders, ...options.httpHeaders ?? {} }; if (this.cacheEnabled) { this.redis = options.redis; this.nodeCache = options.redis ? void 0 : new NodeCache(); } } /** * Fetches preview data for a given URL * @param url The URL to fetch preview data for * @returns Promise containing the preview data */ async getLinkPreview(url) { const cacheKey = `link-preview-node:${url}`; if (!isValidUrl(url)) { return null; } if (this.cacheEnabled) { if (this.redis) { const cachedData = await this.redis.get(cacheKey); if (cachedData) { return JSON.parse(cachedData); } } else { const cachedData = this.nodeCache?.get(cacheKey); if (cachedData) { return cachedData; } } } const videoId = getVideoIdFromYoutubeUrl(url); const response = videoId ? await this.getYoutubeData(videoId) : await this.fetchWithRedirects(url); const contentType = response.headers["content-type"]; if (contentType?.startsWith("image/")) { return { title: "", desc: "", siteName: "", image: url, url }; } const document = this.responseToDocument(response.data); if (!document) { return null; } const metadata = this.extractMetadata(document, url); if (this.cacheEnabled) { if (this.redis) { await this.redis.set(cacheKey, JSON.stringify(metadata), "EX", this.cacheMaxAge); } else { this.nodeCache?.set(cacheKey, metadata, this.cacheMaxAge); } } return metadata; } /** * Extracts metadata from a given document * @param document The document to extract metadata from * @param url The URL of the document * @returns The extracted metadata */ extractMetadata(document, url) { const output = {}; const parsers = [ new OpenGraphParser(document).parse(), new TwitterParser(document).parse(), new YoutubeParser(document).parse(), new JsonLdParser(document).parse(), new HtmlParser(document).parse(), new OtherParser(document).parse() ]; for (const p of parsers) { if (!p) continue; output.title = output.title ?? p.title; output.desc = output.desc ?? p.desc; output.image = output.image ?? p.image; output.siteName = output.siteName ?? p.siteName; output.url = output.url ?? p.url ?? url; if (hasAllMetadata(output)) break; } if (output.url && output.image) { try { const baseUrl = new URL(output.url); output.image = new URL(output.image, baseUrl).toString(); } catch (error) { output.image = void 0; } } return output; } /** * Converts an HTML string to a cheerio document * @param html The HTML string to convert * @returns The converted cheerio document or null if conversion fails */ responseToDocument(html) { try { return cheerio.load(html); } catch (err) { throw err; } } /** * Fetches data from a given URL with redirects * @param url The URL to fetch data from * @returns The fetched data */ async fetchWithRedirects(url) { try { return await axios.get(url, { headers: { ...this.httpHeaders }, maxRedirects: this.maxRedirects, timeout: this.requestTimeout }); } catch (err) { throw err; } } /** * Fetches data from a given YouTube video ID * @param videoId The YouTube video ID to fetch data from * @returns The fetched data */ async getYoutubeData(videoId) { const url = `https://www.youtube.com/watch?v=${videoId}`; return await this.fetchWithRedirects(url); } }; var index_default = LinkPreview; export { index_default as default };