link-preview-js
Version:
Javascript module to extract and fetch HTTP link information from blocks of text.
273 lines (272 loc) • 16.9 kB
JavaScript
;
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
var __importDefault = (this && this.__importDefault) || function (mod) {
return (mod && mod.__esModule) ? mod : { "default": mod };
};
Object.defineProperty(exports, "__esModule", { value: true });
const index_1 = require("../index");
const constants_1 = require("../constants");
const sampleResponse_json_1 = __importDefault(require("./sampleResponse.json"));
describe(`#REGEX_LOOPBACK`, () => {
it(`matches IPv6 loopback and local ranges`, () => {
expect(constants_1.CONSTANTS.REGEX_LOOPBACK.test(`::1`)).toBe(true);
expect(constants_1.CONSTANTS.REGEX_LOOPBACK.test(`::ffff:127.0.0.1`)).toBe(true);
expect(constants_1.CONSTANTS.REGEX_LOOPBACK.test(`fc00::1`)).toBe(true);
expect(constants_1.CONSTANTS.REGEX_LOOPBACK.test(`fd12:3456:789a::1`)).toBe(true);
expect(constants_1.CONSTANTS.REGEX_LOOPBACK.test(`fe80::abcd`)).toBe(true);
expect(constants_1.CONSTANTS.REGEX_LOOPBACK.test(`febf::abcd`)).toBe(true);
});
it(`does not match non-local IPv6 addresses`, () => {
expect(constants_1.CONSTANTS.REGEX_LOOPBACK.test(`2001:4860:4860::8888`)).toBe(false);
});
});
describe(`#getLinkPreview()`, () => {
it(`should extract link info from just URL`, () => __awaiter(void 0, void 0, void 0, function* () {
var _a;
const linkInfo = yield (0, index_1.getLinkPreview)(`https://www.youtube.com/watch?v=wuClZjOdT30`, {
headers: { "Accept-Language": `en-US` },
});
expect(linkInfo.url).toEqual(`https://www.youtube.com/watch?v=wuClZjOdT30`);
expect(linkInfo.siteName).toEqual(`YouTube`);
expect(linkInfo.title).toEqual(`Geography Now! Germany`);
expect(linkInfo.description).toBeTruthy();
expect(linkInfo.mediaType).toEqual(`video.other`);
expect(linkInfo.images.length).toEqual(1);
expect(linkInfo.images[0]).toEqual(`https://i.ytimg.com/vi/wuClZjOdT30/maxresdefault.jpg`);
expect(linkInfo.videos.length).toEqual(0);
expect(linkInfo.favicons[0]).not.toBe(``);
expect(linkInfo.contentType.toLowerCase()).toEqual(`text/html`);
expect((_a = linkInfo.charset) === null || _a === void 0 ? void 0 : _a.toLowerCase()).toEqual(`utf-8`);
}));
it("returns charset of website", () => __awaiter(void 0, void 0, void 0, function* () {
var _a;
const linkInfo = yield (0, index_1.getLinkPreview)(`https://www.pravda.com.ua`);
expect(linkInfo.url).toEqual(`https://www.pravda.com.ua/`);
expect(linkInfo.contentType.toLowerCase()).toEqual(`text/html`);
expect((_a = linkInfo.charset) === null || _a === void 0 ? void 0 : _a.toLowerCase()).toEqual(`utf-8`);
}));
xit("should extract author from news article", () => __awaiter(void 0, void 0, void 0, function* () {
const linkInfo = yield (0, index_1.getLinkPreview)(`https://www.usatoday.com/story/special/contributor-content/2025/10/15/why-chaos-engineering-is-more-important-than-ever-in-the-ai-era/86712877007/`);
expect(linkInfo.author).toEqual(`Matt Emma`);
}));
it(`should extract link info from a URL with a newline`, () => __awaiter(void 0, void 0, void 0, function* () {
const linkInfo = yield (0, index_1.getLinkPreview)(`
https://www.youtube.com/watch?v=wuClZjOdT30
`, { headers: { "Accept-Language": `en-US` } });
expect(linkInfo.url).toEqual(`https://www.youtube.com/watch?v=wuClZjOdT30`);
expect(linkInfo.title).toEqual(`Geography Now! Germany`);
expect(linkInfo.siteName).toBeTruthy();
expect(linkInfo.description).toBeTruthy();
expect(linkInfo.mediaType).toEqual(`video.other`);
expect(linkInfo.images.length).toEqual(1);
expect(linkInfo.images[0]).toEqual(`https://i.ytimg.com/vi/wuClZjOdT30/maxresdefault.jpg`);
expect(linkInfo.videos.length).toEqual(0);
expect(linkInfo.favicons[0]).not.toBe(``);
expect(linkInfo.contentType.toLowerCase()).toEqual(`text/html`);
}));
it(`should extract link info from just text with a URL`, () => __awaiter(void 0, void 0, void 0, function* () {
const linkInfo = yield (0, index_1.getLinkPreview)(`This is some text blah blah https://www.youtube.com/watch?v=wuClZjOdT30 and more text`, { headers: { "Accept-Language": `en-US` } });
expect(linkInfo.url).toEqual(`https://www.youtube.com/watch?v=wuClZjOdT30`);
expect(linkInfo.title).toEqual(`Geography Now! Germany`);
expect(linkInfo.siteName).toEqual(`YouTube`);
expect(linkInfo.description).toBeTruthy();
expect(linkInfo.mediaType).toEqual(`video.other`);
expect(linkInfo.images.length).toEqual(1);
expect(linkInfo.images[0]).toEqual(`https://i.ytimg.com/vi/wuClZjOdT30/maxresdefault.jpg`);
expect(linkInfo.videos.length).toEqual(0);
expect(linkInfo.favicons[0]).toBeTruthy();
expect(linkInfo.contentType.toLowerCase()).toEqual(`text/html`);
}));
// it(`should make request with different languages`, async () => {
// let linkInfo: any = await getLinkPreview(`https://www.wikipedia.org/`, {
// headers: { "Accept-Language": `es` },
// followRedirects: `follow`,
// });
// expect(linkInfo.title).toContain(`Wikipedia, la enciclopedia libre`);
// linkInfo = await getLinkPreview(`https://www.wikipedia.org/`);
// expect(linkInfo.title).toContain(`Wikipedia`);
// });
it(`should handle audio urls`, () => __awaiter(void 0, void 0, void 0, function* () {
var _a;
const linkInfo = yield (0, index_1.getLinkPreview)(`https://ondemand.npr.org/anon.npr-mp3/npr/atc/2007/12/20071231_atc_13.mp3`);
expect(linkInfo.url).toEqual(`https://ondemand.npr.org/anon.npr-mp3/npr/atc/2007/12/20071231_atc_13.mp3`);
expect(linkInfo.mediaType).toEqual(`audio`);
expect((_a = linkInfo.contentType) === null || _a === void 0 ? void 0 : _a.toLowerCase()).toEqual(`audio/mpeg`);
expect(linkInfo.favicons[0]).toBeTruthy();
}));
it(`should handle video urls`, () => __awaiter(void 0, void 0, void 0, function* () {
var _a;
const linkInfo = yield (0, index_1.getLinkPreview)(`https://www.w3schools.com/html/mov_bbb.mp4`);
expect(linkInfo.url).toEqual(`https://www.w3schools.com/html/mov_bbb.mp4`);
expect(linkInfo.mediaType).toEqual(`video`);
expect((_a = linkInfo.contentType) === null || _a === void 0 ? void 0 : _a.toLowerCase()).toEqual(`video/mp4`);
expect(linkInfo.favicons[0]).toBeTruthy();
}));
it(`should handle image urls`, () => __awaiter(void 0, void 0, void 0, function* () {
var _a;
const linkInfo = yield (0, index_1.getLinkPreview)(`https://media.npr.org/assets/img/2018/04/27/gettyimages-656523922nunes-4bb9a194ab2986834622983bb2f8fe57728a9e5f-s1100-c15.jpg`);
expect(linkInfo.url).toEqual(`https://media.npr.org/assets/img/2018/04/27/gettyimages-656523922nunes-4bb9a194ab2986834622983bb2f8fe57728a9e5f-s1100-c15.jpg`);
expect(linkInfo.mediaType).toEqual(`image`);
expect((_a = linkInfo.contentType) === null || _a === void 0 ? void 0 : _a.toLowerCase()).toEqual(`image/jpeg`);
expect(linkInfo.favicons[0]).toBeTruthy();
}));
it(`should handle unknown content type urls`, () => __awaiter(void 0, void 0, void 0, function* () {
const linkInfo = yield (0, index_1.getLinkPreview)(`https://mjml.io/try-it-live`);
expect(linkInfo.url).toEqual(`https://mjml.io/try-it-live`);
expect(linkInfo.mediaType).toEqual(`website`);
}));
// This site changed? it is not returning application any more but rather website
// it.skip(`should handle application urls`, async () => {
// const linkInfo = await getLinkPreview(
// `https://assets.curtmfg.com/masterlibrary/56282/installsheet/CME_56282_INS.pdf`
// );
// expect(linkInfo.url).toEqual(
// `https://assets.curtmfg.com/masterlibrary/56282/installsheet/CME_56282_INS.pdf`
// );
// expect(linkInfo.mediaType).toEqual(`application`);
// expect(linkInfo.contentType?.toLowerCase()).toEqual(`application/pdf`);
// expect(linkInfo.favicons[0]).toBeTruthy();
// });
it(`no link in text should fail gracefully`, () => __awaiter(void 0, void 0, void 0, function* () {
yield expect((0, index_1.getLinkPreview)(`no link`)).rejects.toThrowErrorMatchingSnapshot();
}));
it(`should handle malformed urls gracefully`, () => __awaiter(void 0, void 0, void 0, function* () {
yield expect((0, index_1.getLinkPreview)(`this is a malformed link: ahttps://www.youtube.com/watch?v=wuClZjOdT30`)).rejects.toThrowErrorMatchingSnapshot();
}));
it(`should block .internal hostnames`, () => __awaiter(void 0, void 0, void 0, function* () {
yield expect((0, index_1.getLinkPreview)(`http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token`)).rejects.toThrowErrorMatchingSnapshot();
}));
it(`should block .local hostnames`, () => __awaiter(void 0, void 0, void 0, function* () {
yield expect((0, index_1.getLinkPreview)(`http://printer.local/status`)).rejects.toThrowErrorMatchingSnapshot();
}));
it(`should block nip.io wildcard hostnames`, () => __awaiter(void 0, void 0, void 0, function* () {
yield expect((0, index_1.getLinkPreview)(`http://169.254.169.254.nip.io/latest/meta-data/iam/security-credentials/`)).rejects.toThrowErrorMatchingSnapshot();
}));
it(`should block sslip.io wildcard hostnames`, () => __awaiter(void 0, void 0, void 0, function* () {
yield expect((0, index_1.getLinkPreview)(`http://127.0.0.1.sslip.io/`)).rejects.toThrowErrorMatchingSnapshot();
}));
it(`should handle empty strings gracefully`, () => __awaiter(void 0, void 0, void 0, function* () {
yield expect((0, index_1.getLinkPreview)(``)).rejects.toThrowErrorMatchingSnapshot();
}));
it.skip(`should handle a proxy url option`, () => __awaiter(void 0, void 0, void 0, function* () {
// origin header is required by cors-anywhere
const linkInfo = yield (0, index_1.getLinkPreview)(`https://www.youtube.com/watch?v=wuClZjOdT30`, {
proxyUrl: `https://cors-anywhere.herokuapp.com/`,
headers: {
Origin: `http://localhost:8000`,
"Accept-Language": `en-US`,
},
});
expect(linkInfo.url).toEqual(`https://www.youtube.com/watch?v=wuClZjOdT30`);
expect(linkInfo.siteName).toEqual(`YouTube`);
expect(linkInfo.title).toEqual(`Geography Now! Germany`);
expect(linkInfo.description).toBeTruthy();
expect(linkInfo.mediaType).toEqual(`video.other`);
expect(linkInfo.images.length).toEqual(1);
expect(linkInfo.images[0]).toEqual(`https://i.ytimg.com/vi/wuClZjOdT30/maxresdefault.jpg`);
expect(linkInfo.videos.length).toEqual(0);
expect(linkInfo.favicons[0]).not.toBe(``);
expect(linkInfo.contentType.toLowerCase()).toEqual(`text/html`);
}));
it("should timeout (default 3s) with infinite loading link", () => __awaiter(void 0, void 0, void 0, function* () {
try {
yield (0, index_1.getLinkPreview)(`https://www.gamestop.com/video-games/pc-gaming/components/cooling/products/hyper-212-rgb-black-edition-fan/185243.html?gclid=Cj0KCQjwraqHBhDsARIsAKuGZeECDlqkF2cxpcuS0xRxQmrv5BxFawWS_B51kiqehPf64_KlO0oyunsaAhn5EALw_wcB&gclsrc=aw.ds`);
}
catch (e) {
expect(e.message).toEqual("Request timeout");
}
}));
it("should timeout (custom 1s) with infinite loading link", () => __awaiter(void 0, void 0, void 0, function* () {
try {
yield (0, index_1.getLinkPreview)(`https://www.gamestop.com/video-games/pc-gaming/components/cooling/products/hyper-212-rgb-black-edition-fan/185243.html?gclid=Cj0KCQjwraqHBhDsARIsAKuGZeECDlqkF2cxpcuS0xRxQmrv5BxFawWS_B51kiqehPf64_KlO0oyunsaAhn5EALw_wcB&gclsrc=aw.ds`, {
timeout: 1000,
});
}
catch (e) {
expect(e.message).toEqual("Request timeout");
}
}));
it(`should handle followRedirects option is error`, () => __awaiter(void 0, void 0, void 0, function* () {
try {
yield (0, index_1.getLinkPreview)(`http://google.com/`, { followRedirects: `error` });
}
catch (e) {
expect(e.message).toContain(`UnexpectedRedirect`);
}
}));
it(`should handle followRedirects option is manual but handleRedirects was not provided`, () => __awaiter(void 0, void 0, void 0, function* () {
try {
yield (0, index_1.getLinkPreview)(`http://google.com/`, { followRedirects: `manual` });
}
catch (e) {
expect(e.message).toEqual(`link-preview-js followRedirects is set to manual, but no handleRedirects function was provided`);
}
}));
it(`should handle followRedirects option is manual with handleRedirects function`, () => __awaiter(void 0, void 0, void 0, function* () {
const response = yield (0, index_1.getLinkPreview)(`http://google.com/`, {
followRedirects: `manual`,
handleRedirects: (baseURL, forwardedURL) => {
if (forwardedURL !== `http://www.google.com/`) {
return false;
}
return true;
},
});
expect(response.contentType).toEqual(`text/html`);
expect(response.url).toEqual(`http://www.google.com/`);
expect(response.mediaType).toEqual(`website`);
}));
it("should handle override response body using onResponse option", () => __awaiter(void 0, void 0, void 0, function* () {
let firstParagraphText;
const res = yield (0, index_1.getLinkPreview)(`https://www.example.com/`, {
onResponse: (result, doc) => {
firstParagraphText = doc("p")
.first()
.text()
.split("\n")
.map((x) => x.trim())
.join(" ");
result.siteName = `SiteName has been overridden`;
result.description = firstParagraphText;
return result;
},
});
expect(res.siteName).toEqual("SiteName has been overridden");
expect(res.description).toEqual(firstParagraphText);
}));
it("should handle video tags without type or secure_url tags", () => __awaiter(void 0, void 0, void 0, function* () {
const res = yield (0, index_1.getLinkPreview)(`https://newpathtitle.com/falling-markets-how-to-stop-buyer-from-getting-out/`, { followRedirects: `follow` });
expect(res.siteName).toEqual(`New Path Title`);
expect(res.title).toEqual(`Falling Markets: How To Stop A Buyer From Getting Out | New Path Title`);
expect(res.description).toBeTruthy();
expect(res.mediaType).toEqual(`article`);
expect(res.images.length).toBeGreaterThan(0);
expect(res.videos.length).toBeGreaterThan(0);
expect(res.videos[0].url).toEqual(`https://www.youtube.com/embed/nqNXjxpAPkU`);
expect(res.favicons.length).toBeGreaterThan(0);
expect(res.contentType.toLowerCase()).toEqual(`text/html`);
}));
});
describe(`#getPreviewFromContent`, () => {
it(`Basic parsing`, () => __awaiter(void 0, void 0, void 0, function* () {
const linkInfo = yield (0, index_1.getPreviewFromContent)(sampleResponse_json_1.default);
expect(linkInfo.url).toEqual(`https://www.youtube.com/watch?v=wuClZjOdT30`);
expect(linkInfo.siteName).toEqual(`YouTube`);
expect(linkInfo.title).toEqual(`Geography Now! Germany`);
expect(linkInfo.description).toBeTruthy();
expect(linkInfo.mediaType).toEqual(`video.other`);
expect(linkInfo.images.length).toEqual(1);
expect(linkInfo.images[0]).toEqual(`https://i.ytimg.com/vi/wuClZjOdT30/maxresdefault.jpg`);
expect(linkInfo.videos.length).toEqual(0);
expect(linkInfo.favicons[0]).not.toBe(``);
expect(linkInfo.contentType.toLowerCase()).toEqual(`text/html`);
}));
});