url-metadata
Version:
Request a url and scrape the metadata from its HTML using Node.js or the browser.
71 lines (64 loc) • 2.4 kB
JavaScript
const urlMetadata = require('./../index')
// NOTE: we may need to start to just mocking the responses here
// as more and more sites convert to UTF-8
// header: 'Content-Type': 'text/html; charset=EUC-JP'
test('EUC-JP charset auto-detected from `Content-Type` header', async () => {
const url = 'https://irobot.csse.muroran-it.ac.jp/'
const title = '認知ロボティクス研究室 - iRobotLab'
try {
const metadata = await urlMetadata(url)
// Test decoding:
expect(metadata.title).toContain(title)
} catch (err) {
expect(err).toBe(undefined)
}
})
// ex: <META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=EUC-JP">
test('EUC-JP charset auto-detected in meta tag', async () => {
const url = 'https://plaza.umin.ac.jp/GHDNet/97/h904wnn.html'
const title = 'h904wnn'
try {
const metadata = await urlMetadata(url)
// Test decoding:
expect(metadata.title).toContain(title)
} catch (err) {
expect(err).toBe(undefined)
}
})
// ex: <meta charset="windows-1251">
test('charset=windows-1251 (Cyrillic) auto-detected in meta tag', async () => {
const url = 'https://forum.ru-board.com/'
const title = 'Компьютерный форум Ru.Board'
try {
const metadata = await urlMetadata(url)
// If this breaks, it means the test is broken & we need a diff example url:
expect(metadata['Content-Type']).toBe('text/html; charset=windows-1251')
// Test decoding:
expect(metadata.title).toBe(title)
} catch (err) {
expect(err).toBe(undefined)
}
})
test('User-specified opts.decode=windows-1251 (Cyrillic)', async () => {
const url = 'https://forum.ru-board.com/'
const title = 'Компьютерный форум Ru.Board'
try {
const metadata = await urlMetadata(url, { decode: 'windows-1251' })
// If this breaks, it means the test is broken & we need a diff example url
expect(metadata['Content-Type']).toBe('text/html; charset=windows-1251')
// Test decoding:
expect(metadata.title).toBe(title)
} catch (err) {
expect(err).toBe(undefined)
}
})
test('throws error with bad opts.decode', async () => {
const url = 'https://www.npmjs.com/package/url-metadata'
try {
const metadata = await urlMetadata(url, { decode: 'FOO-BAR' })
// shouldn't get here but just in case
expect(metadata).toBeUndefined()
} catch (err) {
expect(err).toBeDefined()
}
})