UNPKG

link-preview-js

Version:

Javascript module to extract and fetch HTTP link information from blocks of text.

273 lines (272 loc) 16.9 kB
"use strict"; var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) { function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); } return new (P || (P = Promise))(function (resolve, reject) { function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } } function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } } function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); } step((generator = generator.apply(thisArg, _arguments || [])).next()); }); }; var __importDefault = (this && this.__importDefault) || function (mod) { return (mod && mod.__esModule) ? mod : { "default": mod }; }; Object.defineProperty(exports, "__esModule", { value: true }); const index_1 = require("../index"); const constants_1 = require("../constants"); const sampleResponse_json_1 = __importDefault(require("./sampleResponse.json")); describe(`#REGEX_LOOPBACK`, () => { it(`matches IPv6 loopback and local ranges`, () => { expect(constants_1.CONSTANTS.REGEX_LOOPBACK.test(`::1`)).toBe(true); expect(constants_1.CONSTANTS.REGEX_LOOPBACK.test(`::ffff:127.0.0.1`)).toBe(true); expect(constants_1.CONSTANTS.REGEX_LOOPBACK.test(`fc00::1`)).toBe(true); expect(constants_1.CONSTANTS.REGEX_LOOPBACK.test(`fd12:3456:789a::1`)).toBe(true); expect(constants_1.CONSTANTS.REGEX_LOOPBACK.test(`fe80::abcd`)).toBe(true); expect(constants_1.CONSTANTS.REGEX_LOOPBACK.test(`febf::abcd`)).toBe(true); }); it(`does not match non-local IPv6 addresses`, () => { expect(constants_1.CONSTANTS.REGEX_LOOPBACK.test(`2001:4860:4860::8888`)).toBe(false); }); }); describe(`#getLinkPreview()`, () => { it(`should extract link info from just URL`, () => __awaiter(void 0, void 0, void 0, function* () { var _a; const linkInfo = yield (0, index_1.getLinkPreview)(`https://www.youtube.com/watch?v=wuClZjOdT30`, { headers: { "Accept-Language": `en-US` }, }); expect(linkInfo.url).toEqual(`https://www.youtube.com/watch?v=wuClZjOdT30`); expect(linkInfo.siteName).toEqual(`YouTube`); expect(linkInfo.title).toEqual(`Geography Now! Germany`); expect(linkInfo.description).toBeTruthy(); expect(linkInfo.mediaType).toEqual(`video.other`); expect(linkInfo.images.length).toEqual(1); expect(linkInfo.images[0]).toEqual(`https://i.ytimg.com/vi/wuClZjOdT30/maxresdefault.jpg`); expect(linkInfo.videos.length).toEqual(0); expect(linkInfo.favicons[0]).not.toBe(``); expect(linkInfo.contentType.toLowerCase()).toEqual(`text/html`); expect((_a = linkInfo.charset) === null || _a === void 0 ? void 0 : _a.toLowerCase()).toEqual(`utf-8`); })); it("returns charset of website", () => __awaiter(void 0, void 0, void 0, function* () { var _a; const linkInfo = yield (0, index_1.getLinkPreview)(`https://www.pravda.com.ua`); expect(linkInfo.url).toEqual(`https://www.pravda.com.ua/`); expect(linkInfo.contentType.toLowerCase()).toEqual(`text/html`); expect((_a = linkInfo.charset) === null || _a === void 0 ? void 0 : _a.toLowerCase()).toEqual(`utf-8`); })); xit("should extract author from news article", () => __awaiter(void 0, void 0, void 0, function* () { const linkInfo = yield (0, index_1.getLinkPreview)(`https://www.usatoday.com/story/special/contributor-content/2025/10/15/why-chaos-engineering-is-more-important-than-ever-in-the-ai-era/86712877007/`); expect(linkInfo.author).toEqual(`Matt Emma`); })); it(`should extract link info from a URL with a newline`, () => __awaiter(void 0, void 0, void 0, function* () { const linkInfo = yield (0, index_1.getLinkPreview)(` https://www.youtube.com/watch?v=wuClZjOdT30 `, { headers: { "Accept-Language": `en-US` } }); expect(linkInfo.url).toEqual(`https://www.youtube.com/watch?v=wuClZjOdT30`); expect(linkInfo.title).toEqual(`Geography Now! Germany`); expect(linkInfo.siteName).toBeTruthy(); expect(linkInfo.description).toBeTruthy(); expect(linkInfo.mediaType).toEqual(`video.other`); expect(linkInfo.images.length).toEqual(1); expect(linkInfo.images[0]).toEqual(`https://i.ytimg.com/vi/wuClZjOdT30/maxresdefault.jpg`); expect(linkInfo.videos.length).toEqual(0); expect(linkInfo.favicons[0]).not.toBe(``); expect(linkInfo.contentType.toLowerCase()).toEqual(`text/html`); })); it(`should extract link info from just text with a URL`, () => __awaiter(void 0, void 0, void 0, function* () { const linkInfo = yield (0, index_1.getLinkPreview)(`This is some text blah blah https://www.youtube.com/watch?v=wuClZjOdT30 and more text`, { headers: { "Accept-Language": `en-US` } }); expect(linkInfo.url).toEqual(`https://www.youtube.com/watch?v=wuClZjOdT30`); expect(linkInfo.title).toEqual(`Geography Now! Germany`); expect(linkInfo.siteName).toEqual(`YouTube`); expect(linkInfo.description).toBeTruthy(); expect(linkInfo.mediaType).toEqual(`video.other`); expect(linkInfo.images.length).toEqual(1); expect(linkInfo.images[0]).toEqual(`https://i.ytimg.com/vi/wuClZjOdT30/maxresdefault.jpg`); expect(linkInfo.videos.length).toEqual(0); expect(linkInfo.favicons[0]).toBeTruthy(); expect(linkInfo.contentType.toLowerCase()).toEqual(`text/html`); })); // it(`should make request with different languages`, async () => { // let linkInfo: any = await getLinkPreview(`https://www.wikipedia.org/`, { // headers: { "Accept-Language": `es` }, // followRedirects: `follow`, // }); // expect(linkInfo.title).toContain(`Wikipedia, la enciclopedia libre`); // linkInfo = await getLinkPreview(`https://www.wikipedia.org/`); // expect(linkInfo.title).toContain(`Wikipedia`); // }); it(`should handle audio urls`, () => __awaiter(void 0, void 0, void 0, function* () { var _a; const linkInfo = yield (0, index_1.getLinkPreview)(`https://ondemand.npr.org/anon.npr-mp3/npr/atc/2007/12/20071231_atc_13.mp3`); expect(linkInfo.url).toEqual(`https://ondemand.npr.org/anon.npr-mp3/npr/atc/2007/12/20071231_atc_13.mp3`); expect(linkInfo.mediaType).toEqual(`audio`); expect((_a = linkInfo.contentType) === null || _a === void 0 ? void 0 : _a.toLowerCase()).toEqual(`audio/mpeg`); expect(linkInfo.favicons[0]).toBeTruthy(); })); it(`should handle video urls`, () => __awaiter(void 0, void 0, void 0, function* () { var _a; const linkInfo = yield (0, index_1.getLinkPreview)(`https://www.w3schools.com/html/mov_bbb.mp4`); expect(linkInfo.url).toEqual(`https://www.w3schools.com/html/mov_bbb.mp4`); expect(linkInfo.mediaType).toEqual(`video`); expect((_a = linkInfo.contentType) === null || _a === void 0 ? void 0 : _a.toLowerCase()).toEqual(`video/mp4`); expect(linkInfo.favicons[0]).toBeTruthy(); })); it(`should handle image urls`, () => __awaiter(void 0, void 0, void 0, function* () { var _a; const linkInfo = yield (0, index_1.getLinkPreview)(`https://media.npr.org/assets/img/2018/04/27/gettyimages-656523922nunes-4bb9a194ab2986834622983bb2f8fe57728a9e5f-s1100-c15.jpg`); expect(linkInfo.url).toEqual(`https://media.npr.org/assets/img/2018/04/27/gettyimages-656523922nunes-4bb9a194ab2986834622983bb2f8fe57728a9e5f-s1100-c15.jpg`); expect(linkInfo.mediaType).toEqual(`image`); expect((_a = linkInfo.contentType) === null || _a === void 0 ? void 0 : _a.toLowerCase()).toEqual(`image/jpeg`); expect(linkInfo.favicons[0]).toBeTruthy(); })); it(`should handle unknown content type urls`, () => __awaiter(void 0, void 0, void 0, function* () { const linkInfo = yield (0, index_1.getLinkPreview)(`https://mjml.io/try-it-live`); expect(linkInfo.url).toEqual(`https://mjml.io/try-it-live`); expect(linkInfo.mediaType).toEqual(`website`); })); // This site changed? it is not returning application any more but rather website // it.skip(`should handle application urls`, async () => { // const linkInfo = await getLinkPreview( // `https://assets.curtmfg.com/masterlibrary/56282/installsheet/CME_56282_INS.pdf` // ); // expect(linkInfo.url).toEqual( // `https://assets.curtmfg.com/masterlibrary/56282/installsheet/CME_56282_INS.pdf` // ); // expect(linkInfo.mediaType).toEqual(`application`); // expect(linkInfo.contentType?.toLowerCase()).toEqual(`application/pdf`); // expect(linkInfo.favicons[0]).toBeTruthy(); // }); it(`no link in text should fail gracefully`, () => __awaiter(void 0, void 0, void 0, function* () { yield expect((0, index_1.getLinkPreview)(`no link`)).rejects.toThrowErrorMatchingSnapshot(); })); it(`should handle malformed urls gracefully`, () => __awaiter(void 0, void 0, void 0, function* () { yield expect((0, index_1.getLinkPreview)(`this is a malformed link: ahttps://www.youtube.com/watch?v=wuClZjOdT30`)).rejects.toThrowErrorMatchingSnapshot(); })); it(`should block .internal hostnames`, () => __awaiter(void 0, void 0, void 0, function* () { yield expect((0, index_1.getLinkPreview)(`http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token`)).rejects.toThrowErrorMatchingSnapshot(); })); it(`should block .local hostnames`, () => __awaiter(void 0, void 0, void 0, function* () { yield expect((0, index_1.getLinkPreview)(`http://printer.local/status`)).rejects.toThrowErrorMatchingSnapshot(); })); it(`should block nip.io wildcard hostnames`, () => __awaiter(void 0, void 0, void 0, function* () { yield expect((0, index_1.getLinkPreview)(`http://169.254.169.254.nip.io/latest/meta-data/iam/security-credentials/`)).rejects.toThrowErrorMatchingSnapshot(); })); it(`should block sslip.io wildcard hostnames`, () => __awaiter(void 0, void 0, void 0, function* () { yield expect((0, index_1.getLinkPreview)(`http://127.0.0.1.sslip.io/`)).rejects.toThrowErrorMatchingSnapshot(); })); it(`should handle empty strings gracefully`, () => __awaiter(void 0, void 0, void 0, function* () { yield expect((0, index_1.getLinkPreview)(``)).rejects.toThrowErrorMatchingSnapshot(); })); it.skip(`should handle a proxy url option`, () => __awaiter(void 0, void 0, void 0, function* () { // origin header is required by cors-anywhere const linkInfo = yield (0, index_1.getLinkPreview)(`https://www.youtube.com/watch?v=wuClZjOdT30`, { proxyUrl: `https://cors-anywhere.herokuapp.com/`, headers: { Origin: `http://localhost:8000`, "Accept-Language": `en-US`, }, }); expect(linkInfo.url).toEqual(`https://www.youtube.com/watch?v=wuClZjOdT30`); expect(linkInfo.siteName).toEqual(`YouTube`); expect(linkInfo.title).toEqual(`Geography Now! Germany`); expect(linkInfo.description).toBeTruthy(); expect(linkInfo.mediaType).toEqual(`video.other`); expect(linkInfo.images.length).toEqual(1); expect(linkInfo.images[0]).toEqual(`https://i.ytimg.com/vi/wuClZjOdT30/maxresdefault.jpg`); expect(linkInfo.videos.length).toEqual(0); expect(linkInfo.favicons[0]).not.toBe(``); expect(linkInfo.contentType.toLowerCase()).toEqual(`text/html`); })); it("should timeout (default 3s) with infinite loading link", () => __awaiter(void 0, void 0, void 0, function* () { try { yield (0, index_1.getLinkPreview)(`https://www.gamestop.com/video-games/pc-gaming/components/cooling/products/hyper-212-rgb-black-edition-fan/185243.html?gclid=Cj0KCQjwraqHBhDsARIsAKuGZeECDlqkF2cxpcuS0xRxQmrv5BxFawWS_B51kiqehPf64_KlO0oyunsaAhn5EALw_wcB&gclsrc=aw.ds`); } catch (e) { expect(e.message).toEqual("Request timeout"); } })); it("should timeout (custom 1s) with infinite loading link", () => __awaiter(void 0, void 0, void 0, function* () { try { yield (0, index_1.getLinkPreview)(`https://www.gamestop.com/video-games/pc-gaming/components/cooling/products/hyper-212-rgb-black-edition-fan/185243.html?gclid=Cj0KCQjwraqHBhDsARIsAKuGZeECDlqkF2cxpcuS0xRxQmrv5BxFawWS_B51kiqehPf64_KlO0oyunsaAhn5EALw_wcB&gclsrc=aw.ds`, { timeout: 1000, }); } catch (e) { expect(e.message).toEqual("Request timeout"); } })); it(`should handle followRedirects option is error`, () => __awaiter(void 0, void 0, void 0, function* () { try { yield (0, index_1.getLinkPreview)(`http://google.com/`, { followRedirects: `error` }); } catch (e) { expect(e.message).toContain(`UnexpectedRedirect`); } })); it(`should handle followRedirects option is manual but handleRedirects was not provided`, () => __awaiter(void 0, void 0, void 0, function* () { try { yield (0, index_1.getLinkPreview)(`http://google.com/`, { followRedirects: `manual` }); } catch (e) { expect(e.message).toEqual(`link-preview-js followRedirects is set to manual, but no handleRedirects function was provided`); } })); it(`should handle followRedirects option is manual with handleRedirects function`, () => __awaiter(void 0, void 0, void 0, function* () { const response = yield (0, index_1.getLinkPreview)(`http://google.com/`, { followRedirects: `manual`, handleRedirects: (baseURL, forwardedURL) => { if (forwardedURL !== `http://www.google.com/`) { return false; } return true; }, }); expect(response.contentType).toEqual(`text/html`); expect(response.url).toEqual(`http://www.google.com/`); expect(response.mediaType).toEqual(`website`); })); it("should handle override response body using onResponse option", () => __awaiter(void 0, void 0, void 0, function* () { let firstParagraphText; const res = yield (0, index_1.getLinkPreview)(`https://www.example.com/`, { onResponse: (result, doc) => { firstParagraphText = doc("p") .first() .text() .split("\n") .map((x) => x.trim()) .join(" "); result.siteName = `SiteName has been overridden`; result.description = firstParagraphText; return result; }, }); expect(res.siteName).toEqual("SiteName has been overridden"); expect(res.description).toEqual(firstParagraphText); })); it("should handle video tags without type or secure_url tags", () => __awaiter(void 0, void 0, void 0, function* () { const res = yield (0, index_1.getLinkPreview)(`https://newpathtitle.com/falling-markets-how-to-stop-buyer-from-getting-out/`, { followRedirects: `follow` }); expect(res.siteName).toEqual(`New Path Title`); expect(res.title).toEqual(`Falling Markets: How To Stop A Buyer From Getting Out | New Path Title`); expect(res.description).toBeTruthy(); expect(res.mediaType).toEqual(`article`); expect(res.images.length).toBeGreaterThan(0); expect(res.videos.length).toBeGreaterThan(0); expect(res.videos[0].url).toEqual(`https://www.youtube.com/embed/nqNXjxpAPkU`); expect(res.favicons.length).toBeGreaterThan(0); expect(res.contentType.toLowerCase()).toEqual(`text/html`); })); }); describe(`#getPreviewFromContent`, () => { it(`Basic parsing`, () => __awaiter(void 0, void 0, void 0, function* () { const linkInfo = yield (0, index_1.getPreviewFromContent)(sampleResponse_json_1.default); expect(linkInfo.url).toEqual(`https://www.youtube.com/watch?v=wuClZjOdT30`); expect(linkInfo.siteName).toEqual(`YouTube`); expect(linkInfo.title).toEqual(`Geography Now! Germany`); expect(linkInfo.description).toBeTruthy(); expect(linkInfo.mediaType).toEqual(`video.other`); expect(linkInfo.images.length).toEqual(1); expect(linkInfo.images[0]).toEqual(`https://i.ytimg.com/vi/wuClZjOdT30/maxresdefault.jpg`); expect(linkInfo.videos.length).toEqual(0); expect(linkInfo.favicons[0]).not.toBe(``); expect(linkInfo.contentType.toLowerCase()).toEqual(`text/html`); })); });