UNPKG

node-web-crawler

Version:

Node Web Crawler is a web spider written with Nodejs. It gives you the full power of jQuery on the server to parse a big number of pages as they are downloaded, asynchronously. Scraping should be simple and fun!

133 lines (125 loc) 4.73 kB
'use strict'; var Crawler = require('../lib/node-web-crawler'); var expect = require('chai').expect; var jsdom = require('jsdom'); var httpbinHost = 'localhost:8000'; describe('Errors', function() { describe('timeout', function() { var c = new Crawler({ timeout : 1500, retryTimeout : 1000, retries : 2, jquery : false }); it('should return a timeout error after ~5sec', function(done) { // override default mocha test timeout of 2000ms this.timeout(10000); c.queue({ uri : 'http://'+httpbinHost+'/delay/15', callback : function(error, response) //noinspection BadExpressionStatementJS,BadExpressionStatementJS { expect(error).not.to.be.null; expect(response).to.be.undefined; done(); } }); }); it('should retry after a first timeout', function(done) { // override default mocha test timeout of 2000ms this.timeout(15000); c.queue({ uri : 'http://'+httpbinHost+'/delay/1', callback : function(error, response) { expect(error).to.be.null; expect(response.body).to.be.ok; done(); } }); }); }); describe('error status code', function() { var c = new Crawler({ jQuery : false }); it('should not return an error on status code 400 (Bad Request)', function(done) { c.queue({ uri: 'http://' + httpbinHost + '/status/400', callback: function(error, response, $){ expect(error).to.be.null; expect(response.statusCode).to.equal(400); done(); } }); }); it('should not return an error on status code 401 (Unauthorized)', function(done) { c.queue({ uri: 'http://' + httpbinHost + '/status/401', callback: function(error, response, $){ expect(error).to.be.null; expect(response.statusCode).to.equal(401); done(); } }); }); it('should not return an error on status code 403 (Forbidden)', function(done) { c.queue({ uri: 'http://' + httpbinHost + '/status/403', callback: function(error, response, $){ expect(error).to.be.null; expect(response.statusCode).to.equal(403); done(); } }); }); it('should not return an error on a 404', function(done) { c.queue({ uri : 'http://'+httpbinHost+'/status/404', callback : function(error, response) { expect(error).to.be.null; expect(response.statusCode).to.equal(404); done(); } }); }); it('should not return an error on a 500', function(done) { c.queue({ uri : 'http://'+httpbinHost+'/status/500', callback : function(error, response) { expect(error).to.be.null; expect(response.statusCode).to.equal(500); done(); } }); }); it('should not failed on empty response', function(done) { c.queue({ uri : 'http://'+httpbinHost+'/status/204', callback : function(error) { expect(error).to.be.null; done(); } }); }); it('should not failed on a malformed html if jquery is false', function(done) { c.queue({ html : '<html><p>hello <div>dude</p></html>', callback : function(error, response) { expect(error).to.be.null; expect(response).not.to.be.null; done(); } }); }); it('should not return an error on a malformed html if jQuery is jsdom', function(done) { c.queue({ html : '<html><p>hello <div>dude</p></html>', jQuery : jsdom, callback : function(error, response) { expect(error).to.be.null; expect(response).not.to.be.undefined; done(); } }); }); }); });