link-previu
Version:
A Node.js library for getting link previews
448 lines (439 loc) • 13.4 kB
JavaScript
// src/index.ts
import axios from "axios";
import NodeCache from "node-cache";
import * as cheerio from "cheerio";
// src/helpers.ts
import urlRegexSafe from "url-regex-safe";
function isValidUrl(url) {
if (!url) return false;
try {
new URL(url);
return urlRegexSafe({ exact: true }).test(url);
} catch {
return false;
}
}
function getVideoIdFromYoutubeUrl(url) {
const match = url.match(/^(?:https?:\/\/)?(?:www\.)?(?:m\.)?(?:youtu\.be\/|youtube\.com\/(?:embed\/|v\/|watch\?(?:.*&)?v=|shorts\/|live\/))([^"&?\/\s]{11})/);
return match ? match[1] : null;
}
function hasAllMetadata(data) {
return !!(data.title && data.desc && data.image && data.url);
}
// src/parsers/html.parser.ts
var HtmlParser = class {
constructor(document) {
this.document = document;
}
parse() {
const baseDto = {};
baseDto.title = this.getTitle();
baseDto.desc = this.getDescription();
baseDto.image = this.getImage();
baseDto.siteName = this.getSiteName();
return baseDto;
}
getTitle() {
return this.document("head title").text();
}
getDescription() {
return this.document("head meta[name='description']").attr("content") || this.document("head meta[property='description']").attr("content");
}
getImage() {
return this.document("body img").first().attr("src");
}
getSiteName() {
return this.document("head meta[name='site_name']").attr("content") || this.document("head meta[property='site_name']").attr("content");
}
};
// src/parsers/open-graph.parser.ts
var OpenGraphParser = class {
constructor(document) {
this.document = document;
}
parse() {
const metadata = {};
metadata.title = this.getMetaProperty("og:title");
metadata.desc = this.getMetaProperty("og:description");
metadata.image = this.getMetaProperty("og:image");
metadata.url = this.getMetaProperty("og:url");
metadata.siteName = this.getMetaProperty("og:site_name");
return metadata;
}
getMetaProperty(property) {
return this.document(`meta[property='${property}']`).attr("content") || this.document(`meta[name='${property}']`).attr("content");
}
};
// src/parsers/json-ld.parser.ts
var JsonLdParser = class {
constructor(document) {
this.document = document;
this.jsonData = this.parseToJson(document);
}
jsonData;
parse() {
const metadata = {};
metadata.title = this.getTitle();
metadata.desc = this.getDescription();
metadata.image = this.getImage();
metadata.siteName = new OpenGraphParser(this.document).parse().siteName;
return metadata;
}
parseToJson(document) {
const data = document("script[type='application/ld+json']").html();
if (!data) return null;
try {
const cleanedData = data.replace(/\n/g, " ").trim();
return JSON.parse(cleanedData);
} catch (error) {
return null;
}
}
getTitle() {
const data = this.jsonData;
if (!data) return void 0;
if (Array.isArray(data) && data.length > 0) {
return data[0]["name"] || data[0]["headline"];
} else if (typeof data === "object" && data !== null) {
return data["name"] || data["headline"];
}
return void 0;
}
getDescription() {
const data = this.jsonData;
if (!data) return void 0;
if (Array.isArray(data) && data.length > 0) {
return data[0]["description"] || data[0]["headline"];
} else if (typeof data === "object" && data !== null) {
return data["description"] || data["headline"];
}
return void 0;
}
getImage() {
const data = this.jsonData;
if (!data) return void 0;
if (Array.isArray(data) && data.length > 0) {
return this.imageResultToString(data[0]["logo"] || data[0]["image"]);
} else if (typeof data === "object" && data !== null) {
return this.imageResultToString(data["logo"] || data["image"]);
}
return void 0;
}
imageResultToString(result) {
if (Array.isArray(result) && result.length > 0) {
result = result[0];
}
if (typeof result === "string") {
return result;
} else if (result && typeof result === "object") {
return result["url"] || result["contentUrl"];
}
return void 0;
}
};
// src/parsers/other.parser.ts
var OtherParser = class {
constructor(document) {
this.document = document;
}
parse() {
const metadata = {};
metadata.title = this.getTitle();
metadata.desc = this.getDescription();
metadata.image = this.getImage();
metadata.siteName = this.getSiteName();
metadata.url = this.getUrl();
return metadata;
}
getTitle() {
return this.getMetaProperty("name", "title");
}
getDescription() {
return this.getMetaProperty("name", "description");
}
getImage() {
return this.getMetaProperty("name", "image");
}
getSiteName() {
return this.getMetaProperty("name", "site_name");
}
getUrl() {
return this.getMetaProperty("name", "url");
}
getMetaProperty(attribute, property) {
return this.document(`meta[${attribute}='${property}']`).attr("content");
}
};
// src/parsers/twitter.parser.ts
var TwitterParser = class {
constructor(document) {
this.document = document;
}
parse() {
const metadata = {};
const ogParser = new OpenGraphParser(this.document).parse();
metadata.title = this.getTitle();
metadata.desc = this.getDescription();
metadata.image = this.getImage();
metadata.title = metadata.title || ogParser.title;
metadata.desc = metadata.desc || ogParser.desc;
metadata.image = metadata.image || ogParser.image;
metadata.siteName = ogParser.siteName;
metadata.url = ogParser.url;
if (!metadata.title && ogParser.siteName?.includes("Twitter")) {
metadata.title = "Twitter Post";
}
if (metadata.image?.endsWith(".svg") && !metadata.image.includes("twimg.com/media")) {
metadata.image = void 0;
}
return metadata;
}
getTitle() {
return this.getMetaProperty("name", "twitter:title") || this.getMetaProperty("property", "twitter:title") || this.document('meta[name="title"]').attr("content");
}
getDescription() {
return this.getMetaProperty("name", "twitter:description") || this.getMetaProperty("property", "twitter:description") || this.document('meta[name="description"]').attr("content");
}
getImage() {
const image = this.getMetaProperty("name", "twitter:image") || this.getMetaProperty("property", "twitter:image") || this.getMetaProperty("name", "twitter:image:src") || this.getMetaProperty("property", "twitter:image:src");
if (image?.endsWith(".svg") && !image.includes("media")) {
return void 0;
}
return image;
}
getMetaProperty(attribute, property) {
return this.document(`meta[${attribute}='${property}']`).attr("content");
}
};
// src/parsers/youtube.parser.ts
var YoutubeParser = class {
constructor(document) {
this.document = document;
this.jsonData = this.parseToJson(document);
}
jsonData;
parse() {
const metadata = {};
metadata.title = this.getTitle();
metadata.image = this.getImage();
metadata.siteName = this.getSiteName();
metadata.url = this.getUrl();
return metadata;
}
parseToJson(document) {
try {
const html = document.html() || "";
const cleanedData = html.replace("<html><head></head><body>", "").replace("</body></html>", "").replace(/\n/g, " ").trim();
return JSON.parse(cleanedData);
} catch (error) {
return null;
}
}
getTitle() {
const data = this.jsonData;
if (!data) return void 0;
if (Array.isArray(data) && data.length > 0) {
return data[0]["title"];
} else if (typeof data === "object" && data !== null) {
return data["title"];
}
return void 0;
}
getImage() {
const data = this.jsonData;
if (!data) return void 0;
if (Array.isArray(data) && data.length > 0) {
return this.imageResultToString(data[0]["thumbnail_url"]);
} else if (typeof data === "object" && data !== null) {
return this.imageResultToString(data["thumbnail_url"]);
}
return void 0;
}
getSiteName() {
const data = this.jsonData;
if (!data) return void 0;
if (Array.isArray(data) && data.length > 0) {
return data[0]["provider_name"];
} else if (typeof data === "object" && data !== null) {
return data["provider_name"];
}
return void 0;
}
getUrl() {
const data = this.jsonData;
if (!data) return void 0;
if (Array.isArray(data) && data.length > 0) {
return data[0]["provider_url"];
} else if (typeof data === "object" && data !== null) {
return data["provider_url"];
}
return void 0;
}
imageResultToString(result) {
if (Array.isArray(result) && result.length > 0) {
result = result[0];
}
if (typeof result === "string") {
return result;
} else if (result && typeof result === "object") {
return result["url"] || result["contentUrl"];
}
return void 0;
}
};
// src/index.ts
var LinkPreview = class {
redis;
nodeCache;
cacheMaxAge;
cacheEnabled;
requestTimeout;
maxRedirects;
httpHeaders = {
"User-Agent": "WhatsApp/2.23.4.79 A"
// default option
};
/**
* Creates a new LinkPreview instance
* @param options Configuration options for link preview fetching and caching
*/
constructor(options) {
this.requestTimeout = options.requestTimeout ?? 5e3;
this.cacheMaxAge = options.cacheMaxAge ?? 0;
this.cacheEnabled = Boolean(options.cacheMaxAge);
this.maxRedirects = options.maxRedirects ?? 5;
this.httpHeaders = {
...this.httpHeaders,
...options.httpHeaders ?? {}
};
if (this.cacheEnabled) {
this.redis = options.redis;
this.nodeCache = options.redis ? void 0 : new NodeCache();
}
}
/**
* Fetches preview data for a given URL
* @param url The URL to fetch preview data for
* @returns Promise containing the preview data
*/
async getLinkPreview(url) {
const cacheKey = `link-preview-node:${url}`;
if (!isValidUrl(url)) {
return null;
}
if (this.cacheEnabled) {
if (this.redis) {
const cachedData = await this.redis.get(cacheKey);
if (cachedData) {
return JSON.parse(cachedData);
}
} else {
const cachedData = this.nodeCache?.get(cacheKey);
if (cachedData) {
return cachedData;
}
}
}
const videoId = getVideoIdFromYoutubeUrl(url);
const response = videoId ? await this.getYoutubeData(videoId) : await this.fetchWithRedirects(url);
const contentType = response.headers["content-type"];
if (contentType?.startsWith("image/")) {
return {
title: "",
desc: "",
siteName: "",
image: url,
url
};
}
const document = this.responseToDocument(response.data);
if (!document) {
return null;
}
const metadata = this.extractMetadata(document, url);
if (this.cacheEnabled) {
if (this.redis) {
await this.redis.set(cacheKey, JSON.stringify(metadata), "EX", this.cacheMaxAge);
} else {
this.nodeCache?.set(cacheKey, metadata, this.cacheMaxAge);
}
}
return metadata;
}
/**
* Extracts metadata from a given document
* @param document The document to extract metadata from
* @param url The URL of the document
* @returns The extracted metadata
*/
extractMetadata(document, url) {
const output = {};
const parsers = [
new OpenGraphParser(document).parse(),
new TwitterParser(document).parse(),
new YoutubeParser(document).parse(),
new JsonLdParser(document).parse(),
new HtmlParser(document).parse(),
new OtherParser(document).parse()
];
for (const p of parsers) {
if (!p) continue;
output.title = output.title ?? p.title;
output.desc = output.desc ?? p.desc;
output.image = output.image ?? p.image;
output.siteName = output.siteName ?? p.siteName;
output.url = output.url ?? p.url ?? url;
if (hasAllMetadata(output)) break;
}
if (output.url && output.image) {
try {
const baseUrl = new URL(output.url);
output.image = new URL(output.image, baseUrl).toString();
} catch (error) {
output.image = void 0;
}
}
return output;
}
/**
* Converts an HTML string to a cheerio document
* @param html The HTML string to convert
* @returns The converted cheerio document or null if conversion fails
*/
responseToDocument(html) {
try {
return cheerio.load(html);
} catch (err) {
throw err;
}
}
/**
* Fetches data from a given URL with redirects
* @param url The URL to fetch data from
* @returns The fetched data
*/
async fetchWithRedirects(url) {
try {
return await axios.get(url, {
headers: { ...this.httpHeaders },
maxRedirects: this.maxRedirects,
timeout: this.requestTimeout
});
} catch (err) {
throw err;
}
}
/**
* Fetches data from a given YouTube video ID
* @param videoId The YouTube video ID to fetch data from
* @returns The fetched data
*/
async getYoutubeData(videoId) {
const url = `https://www.youtube.com/watch?v=${videoId}`;
return await this.fetchWithRedirects(url);
}
};
var index_default = LinkPreview;
export {
index_default as default
};