UNPKG

crawler

Version:

Crawler is a web spider written with Nodejs. It gives you the full power of jQuery on the server to parse a big number of pages as they are downloaded, asynchronously

github.com/bda-research/node-crawler

bda-research/node-crawler

73 lines (64 loc) • 1.62 kB

JavaScript

/*jshint expr:true */ 'use strict'; var Crawler = require('../lib/crawler'); var expect = require('chai').expect; var sinon = require('sinon'); var nock = require('nock'); var c, spy; describe('Simple test', function () { before(function() { nock.cleanAll(); }); beforeEach(function () { nock('http://nockhost').get(uri => uri.indexOf('status') >= 0).times(20).reply(200, 'Yes'); c = new Crawler({ maxConnections: 10, jquery: false }); }); afterEach(function () { c = {}; spy = {}; }); it('should run the first readme examples', function (done) { // Access to Github might be slow this.timeout(6000); c.queue({ uri: 'http://github.com', callback: function (err, result, done) { expect(err).to.be.null; expect(typeof result.body).to.equal('string'); done(); } }); c.on('drain', done); }); it('should run the readme examples', function (done) { c = new Crawler({ maxConnections: 10, jquery: false, callback: function (err, result, done) { expect(err).to.be.null; done(); } }); c.on('drain', function () { expect(spy.calledTwice).to.be.true; done(); }); spy = sinon.spy(c, 'queue'); c.queue('http://nockhost/status/200'); c.queue('http://nockhost/status/200'); }); it('should run the with an array queue', function (done) { this.timeout(6000); c.queue([{ uri: 'http://www.github.com', jquery: true, callback: function (err, result, done) { expect(err).to.be.null; expect(result.$).not.to.be.null; expect(typeof result.body).to.equal('string'); done(); } }]); c.on('drain', done); }); });