url-metadata
Version:
Request a url and scrape the metadata from its HTML using Node.js or the browser.
39 lines (36 loc) • 1.47 kB
JavaScript
const urlMetadata = require('./../index')
test('handle <meta> tags w itemprop in <head>, ignore <meta> tags w itemprop in <body>', async () => {
// StackOverflow.com pages have overloaded meta tags like the one in the <head> of this ex:
const html = `
<html lang="en">
<head>
<meta name="twitter:description" property="og:description" itemprop="description" content="this is a sample description" />
</head>
<body>
<div class="box less-space hero-image-wrapper" itemprop="image" itemscope itemtype="https://schema.org/ImageObject">
<img src="https://cdn.mos.cms.futurecdn.net/GqMegaDvjg6v4hVKvJ4a83-320-80.png">
<meta itemprop="url" content="https://cdn.mos.cms.futurecdn.net/GqMegaDvjg6v4hVKvJ4a83.png">
<meta itemprop="description" content="this is a sample image"
<meta itemprop="height" content="600">
<meta itemprop="width" content="338">
</div>
</body>
</html>
`
const response = new Response(html, {
headers: {
'Content-Type': 'text/html'
}
})
try {
const metadata = await urlMetadata(null, { parseResponseObject: response })
expect(metadata.url).toBe('') // ignores <meta> tag w `itemprop="url"` in body
const correct = 'this is a sample description'
expect(metadata.description).toBe(correct)
expect(metadata['og:description']).toBe(correct)
expect(metadata['twitter:description']).toBe(correct)
} catch (e) {
expect(e).toBe(undefined)
}
})