UNPKG

rn-url-preview

Version:
272 lines (271 loc) 11.6 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.getPreviewDataHelper = exports.getPreviewDataImageHelper = exports.getContentHelper = exports.getHtmlEntitiesDecodedTextHelper = exports.getActualImageUrlHelper = exports.renderSpecialElementHelper = exports.REGEX_TITLE = exports.REGEX_META = exports.REGEX_LINK = exports.REGEX_IMAGE_TAG = exports.REGEX_IMAGE_CONTENT_TYPE = exports.REGEX_EMAIL = void 0; const react_1 = require("react"); const html_entities_1 = require("html-entities"); /** * Regular expression to match email addresses */ exports.REGEX_EMAIL = /([a-zA-Z0-9+._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)/g; /** * Regular expression to match image content types */ exports.REGEX_IMAGE_CONTENT_TYPE = /image\/*/g; /** * Regular expression to extract image source from img tags * Considers empty line after img tag and takes only the src field * Space before src to avoid matching data-src attributes */ exports.REGEX_IMAGE_TAG = /<img[\n\r]*.*? src=["'](.*?)["']/g; /** * Regular expression to match URLs (http, https, ftp protocols) */ exports.REGEX_LINK = /((http|ftp|https):\/\/)?([\w_-]+(?:(?:\.[\w_-]+)+))([\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])?/i; /** * Regular expression to extract meta tags * Handles both property and name attributes with content * Supports both single and double quotes */ exports.REGEX_META = /<meta.*?(property|name)=["'](.*?)["'].*?content=["'](.*?)["'].*?>/g; /** * Regular expression to extract page title */ exports.REGEX_TITLE = /<title.*?>(.*?)<\/title>/g; /** * Recursively processes and renders special elements based on given filters and properties. * * @param {React.ReactNode} children - The children elements to be rendered. * @param {Object} props - Additional properties to pass to the children elements. * @returns {React.ReactNode[]} - The filtered and modified children elements. */ function renderSpecialElementHelper({ children, props = {}, }) { const renderOnly = Object.keys(props); // Recursive function to process each child const processChild = (child) => { // If child is a valid element with children, recursively process them if ((0, react_1.isValidElement)(child) && // @ts-expect-error react_1.Children.toArray(child.props.children).length > 0) { return (0, react_1.cloneElement)(child, // @ts-expect-error child === null || child === void 0 ? void 0 : child.props, react_1.Children.toArray(child.props .children).map(processChild)); } // If child is a valid React element if ((0, react_1.isValidElement)(child)) { // Redundant check - can be removed if (!(0, react_1.isValidElement)(child)) { return child; } // Get the display name of the child element type const displayName = child.type.displayName; // Check if the element should be included if (renderOnly && !renderOnly.includes(displayName)) { return null; } // Apply props to the child element return (0, react_1.cloneElement)(child, props === null || props === void 0 ? void 0 : props[displayName]); } // Return the original child if it is not a valid React element return child; }; // Convert children to an array and process each child return react_1.Children.toArray(children).map(processChild); } exports.renderSpecialElementHelper = renderSpecialElementHelper; /** * Resolves relative image URLs to absolute URLs * * @param {string} baseUrl - The base URL of the page * @param {string} imageUrl - The image URL to resolve * @returns {string|undefined} - The resolved absolute image URL or undefined */ const getActualImageUrlHelper = (baseUrl, imageUrl) => { let actualImageUrl = imageUrl === null || imageUrl === void 0 ? void 0 : imageUrl.trim(); if (!actualImageUrl || actualImageUrl.startsWith("data")) return; // Handle protocol-relative URLs if (actualImageUrl.startsWith("//")) actualImageUrl = `https:${actualImageUrl}`; // Handle relative URLs if (!actualImageUrl.startsWith("http")) { if (baseUrl.endsWith("/") && actualImageUrl.startsWith("/")) { actualImageUrl = `${baseUrl.slice(0, -1)}${actualImageUrl}`; } else if (!baseUrl.endsWith("/") && !actualImageUrl.startsWith("/")) { actualImageUrl = `${baseUrl}/${actualImageUrl}`; } else { actualImageUrl = `${baseUrl}${actualImageUrl}`; } } return actualImageUrl; }; exports.getActualImageUrlHelper = getActualImageUrlHelper; /** * Decodes HTML entities in text * * @param {string} text - The text to decode * @returns {string|undefined} - The decoded text or undefined */ const getHtmlEntitiesDecodedTextHelper = (text) => { const actualText = text === null || text === void 0 ? void 0 : text.trim(); if (!actualText) return; return (0, html_entities_1.decode)(actualText); }; exports.getHtmlEntitiesDecodedTextHelper = getHtmlEntitiesDecodedTextHelper; /** * Helper to extract content from meta tags based on property/name type * * @param {string} left - First string (could be property/name or content) * @param {string} right - Second string (could be property/name or content) * @param {string} type - The type to look for (e.g., "og:title", "description") * @returns {string|undefined} - The extracted content or undefined */ const getContentHelper = (left, right, type) => { var _a; const contents = { [left.trim()]: right, [right.trim()]: left, }; return (_a = contents[type]) === null || _a === void 0 ? void 0 : _a.trim(); }; exports.getContentHelper = getContentHelper; /** * Helper to process image URLs for preview data * * @param {string} url - The image URL to process * @returns {Promise<string|undefined>} - The processed image URL or undefined */ /* istanbul ignore next */ const getPreviewDataImageHelper = async (url) => { if (!url) return; return url; }; exports.getPreviewDataImageHelper = getPreviewDataImageHelper; /** * Extracts preview data (title, description, image) from a URL * * @param {string} text - The text containing a URL * @param {number} requestTimeout - Timeout for the fetch request in milliseconds * @returns {Promise<PreviewData>} - The extracted preview data */ /* istanbul ignore next */ const getPreviewDataHelper = async (text, requestTimeout = 5000) => { var _a, _b; // Initialize empty preview data const previewData = { description: undefined, image: undefined, link: undefined, title: undefined, }; try { // Remove email addresses from text const textWithoutEmails = text.replace(exports.REGEX_EMAIL, "").trim(); if (!textWithoutEmails) return previewData; // Extract URL from text const link = (_a = textWithoutEmails.match(exports.REGEX_LINK)) === null || _a === void 0 ? void 0 : _a[0]; if (!link) return previewData; // Ensure URL has protocol let url = link; if (!url.toLowerCase().startsWith("http")) { url = "https://" + url; } // Set up fetch with timeout let abortControllerTimeout; const abortController = new AbortController(); const request = fetch(url, { headers: { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36", }, signal: abortController.signal, }); abortControllerTimeout = setTimeout(() => { abortController.abort(); }, requestTimeout); const response = await request; clearTimeout(abortControllerTimeout); previewData.link = url; // Check if the URL points to an image const contentType = (_b = response.headers.get("content-type")) !== null && _b !== void 0 ? _b : ""; if (exports.REGEX_IMAGE_CONTENT_TYPE.test(contentType)) { const image = await (0, exports.getPreviewDataImageHelper)(url); previewData.image = image; return previewData; } // Process HTML content const html = await response.text(); // Some pages return undefined if (!html) return previewData; // Extract head section const head = html.substring(0, html.indexOf("<body")); // Get page title const title = exports.REGEX_TITLE.exec(head); previewData.title = (0, exports.getHtmlEntitiesDecodedTextHelper)(title === null || title === void 0 ? void 0 : title[1]); // Extract meta tags let matches; const meta = []; while ((matches = exports.REGEX_META.exec(head)) !== null) { meta.push(matches); } // Process meta tags to extract preview data const metaPreviewData = meta.reduce((acc, curr) => { // Verify that we have property/name and content // Note that if a page will specify property, name and content in the same meta, regex will fail if (!curr[2] || !curr[3]) return acc; // Extract description, image, and title from meta tags const description = !acc.description && ((0, exports.getContentHelper)(curr[2], curr[3], "og:description") || (0, exports.getContentHelper)(curr[2], curr[3], "description")); const ogImage = !acc.imageUrl && (0, exports.getContentHelper)(curr[2], curr[3], "og:image"); const ogTitle = !acc.title && (0, exports.getContentHelper)(curr[2], curr[3], "og:title"); return { description: description ? (0, exports.getHtmlEntitiesDecodedTextHelper)(description) : acc.description, imageUrl: ogImage ? (0, exports.getActualImageUrlHelper)(url, ogImage) : acc.imageUrl, title: ogTitle ? (0, exports.getHtmlEntitiesDecodedTextHelper)(ogTitle) : acc.title, }; }, { title: previewData.title }); // Update preview data with meta information previewData.description = metaPreviewData.description; previewData.image = await (0, exports.getPreviewDataImageHelper)(metaPreviewData.imageUrl); previewData.title = metaPreviewData.title; // If no image found in meta tags, look for images in the HTML if (!previewData.image) { let imageMatches; const tags = []; while ((imageMatches = exports.REGEX_IMAGE_TAG.exec(html)) !== null) { tags.push(imageMatches); } let images = []; // Process up to 5 image tags, skipping data URLs for (const tag of tags .filter((t) => !t[1].startsWith("data")) .slice(0, 5)) { const image = (0, exports.getActualImageUrlHelper)(url, tag[1]); if (!image) continue; images = [...images, image]; } previewData.image = images[0]; } return previewData; } catch (_c) { // Return empty preview data on error return previewData; } }; exports.getPreviewDataHelper = getPreviewDataHelper;