UNPKG

node-web-crawler

Version:

Node Web Crawler is a web spider written with Nodejs. It gives you the full power of jQuery on the server to parse a big number of pages as they are downloaded, asynchronously. Scraping should be simple and fun!

github.com/jaykshah/node-web-crawler

jaykshah/node-web-crawler

46 lines (41 loc) • 1.49 kB

JavaScript

'use strict'; var Crawler = require('../lib/node-web-crawler'); var expect = require('chai').expect; var _ = require('lodash'); var jsdom = require('jsdom'); var httpbinHost = 'localhost:8000'; var c; describe('Links', function() { beforeEach(function() { c = new Crawler({ forceUTF8: true, jquery: jsdom }); }); it('should resolved links to absolute urls with jsdom', function(done) { c.queue([{ uri : 'http://'+httpbinHost+'/links/3/0', callback: function(error, result, $) //noinspection BadExpressionStatementJS,BadExpressionStatementJS { var links = _.map($('a'), function(a) { return a.href; }); //Both links should be resolve to absolute URLs expect(links[0]).to.equal('http://'+httpbinHost+'/links/3/1'); expect(links[1]).to.equal('http://'+httpbinHost+'/links/3/2'); expect(error).to.be.null; done(); } }]); }); it('should resolved links to absolute urls after redirect with jsdom', function(done) { c.queue([{ uri : 'http://'+httpbinHost+'/redirect-to?url=http://example.com/', callback: function(error, result) { expect(result.uri).to.equal('http://example.com/'); expect(error).to.be.null; done(); } }]); }); });