UNPKG

article-parser

Version:

To extract main article from given URL

demos.pwshub.com/article-parser

ndaidong/article-parser

65 lines (56 loc) • 2 kB

JavaScript

// retrieve.test /* eslint-env jest */ import nock from 'nock' import retrieve from './retrieve.js' const parseUrl = (url) => { const re = new URL(url) return { baseUrl: `${re.protocol}//${re.host}`, path: re.pathname } } describe('test retrieve() method', () => { test('test retrieve with bad status code', async () => { const url = 'https://some.where/bad/page' const { baseUrl, path } = parseUrl(url) nock(baseUrl).get(path).reply(500, 'Error 500') expect(retrieve(url)).rejects.toThrow(new Error('Request failed with error code 500')) }) test('test retrieve from good source', async () => { const url = 'https://some.where/good/page' const { baseUrl, path } = parseUrl(url) nock(baseUrl).get(path).reply(200, '<div>this is content</div>', { 'Content-Type': 'text/html' }) const html = await retrieve(url) expect(html).toEqual('<div>this is content</div>') }) test('test retrieve from good source with \\r\\n', async () => { const url = 'https://some.where/good/page' const { baseUrl, path } = parseUrl(url) nock(baseUrl).get(path).reply(200, '\n\r\r\n\n<div>this is content</div>\n\r\r\n\n', { 'Content-Type': 'text/html' }) const html = await retrieve(url) expect(html).toEqual('<div>this is content</div>') }) test('test retrieve using proxy', async () => { const url = 'https://some.where/good/source-with-proxy' const { baseUrl, path } = parseUrl(url) nock(baseUrl).get(path).reply(200, 'something bad', { 'Content-Type': 'bad/thing' }) nock('https://proxy-server.com') .get('/api/proxy?url=https%3A%2F%2Fsome.where%2Fgood%2Fsource-with-proxy') .reply(200, '<div>this is content</div>', { 'Content-Type': 'text/html' }) const html = await retrieve(url, { proxy: { target: 'https://proxy-server.com/api/proxy?url=' } }) expect(html).toEqual('<div>this is content</div>') nock.cleanAll() }) })