UNPKG

url-metadata

Version:

Request a url and scrape the metadata from its HTML using Node.js or the browser.

122 lines (107 loc) 3.9 kB
const urlMetadata = require('./../index') test('option: `parseResponseObject` from html string', async () => { const html = ` <!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <title>Metadata page</title> <meta name="author" content="foobar"> <meta name="keywords" content="HTML, CSS, JavaScript"> </head> <body> <h1>Metadata page</h1> </body> </html> ` const response = new Response(html, { headers: { 'Content-Type': 'text/html' } }) try { // pass null `url` param & response object as option const metadata = await urlMetadata(null, { parseResponseObject: response }) expect(metadata.url).toBe('') expect(metadata.title).toBe('Metadata page') expect(metadata.lang).toBe('en') expect(metadata.charset).toBe('utf-8') expect(metadata.author).toBe('foobar') } catch (e) { expect(e).toBe(undefined) } }) const size = 1000 test('option: max `size` 1000 bytes aborts call & errors', async () => { try { const url = 'https://google.com' const metadata = await urlMetadata(url, { size }) // should not reach here, but just in case: expect(metadata).toBeUndefined() } catch (e) { expect(e.message).toContain(`over limit: ${size}`) } }) test('options: `includeResponseBody`, custom `headers`, `descriptionLength`, `ensureSecureImageRequest`', async () => { const html = ` <!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <title>Metadata page</title> <meta name="author" content="foobar"> <meta name="keywords" content="HTML, CSS, JavaScript"> <meta name="description" content="Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been"> <link rel="icon" type="image/png" href="http://foo.com/foo.png" sizes="96x96" /> <link rel="icon" type="image/svg+xml" href="//foo.com/favicon.svg" /> <link rel="shortcut icon" href="https://foo.com/favicon.ico" /> </head> <body> <h1>Metadata page</h1> <img src="http://foo.com/foo.jpg" alt="bar" title="baz" /> <img src="//foo.com/bar.png" alt="foobar" title="foobaz" /> </body> </html> ` const response = new Response(html, { headers: { 'Content-Type': 'text/html' } }) try { const metadata = await urlMetadata(null, { parseResponseObject: response, includeResponseBody: true, requestHeaders: { 'User-Agent': 'foo', From: 'bar@bar.com' }, descriptionLength: 20, ensureSecureImageRequest: true }) expect(metadata.responseBody).toContain('<!DOCTYPE html>') expect(metadata.description.length).toBe(20) // test imgTags for https:// upgrade expect(metadata.imgTags.length).toBe(2) expect(metadata.imgTags[0].src).toBe('https://foo.com/foo.jpg') expect(metadata.imgTags[0].alt).toBe('bar') expect(metadata.imgTags[0].title).toBe('baz') expect(metadata.imgTags[1].src).toBe('https://foo.com/bar.png') // test favicons for https:// upgrade expect(metadata.favicons.length).toBe(3) expect(metadata.favicons[0].href).toBe('https://foo.com/foo.png') expect(metadata.favicons[0].rel).toBe('icon') expect(metadata.favicons[0].type).toBe('image/png') expect(metadata.favicons[0].sizes).toBe('96x96') expect(metadata.favicons[1].href).toBe('https://foo.com/favicon.svg') expect(metadata.favicons[1].rel).toBe('icon') expect(metadata.favicons[1].type).toBe('image/svg+xml') expect(metadata.favicons[1].sizes).toBe(undefined) expect(metadata.favicons[2].href).toBe('https://foo.com/favicon.ico') expect(metadata.favicons[2].rel).toBe('shortcut icon') expect(metadata.favicons[2].type).toBe(undefined) expect(metadata.favicons[2].sizes).toBe(undefined) } catch (err) { expect(err).toBe(undefined) } })