node-web-crawler
Version:
Node Web Crawler is a web spider written with Nodejs. It gives you the full power of jQuery on the server to parse a big number of pages as they are downloaded, asynchronously. Scraping should be simple and fun!
59 lines (56 loc) • 1.81 kB
JavaScript
;
var Crawler = require('../lib/node-web-crawler');
var expect = require('chai').expect;
var httpbinHost = 'localhost:8000';
var sinon = require('sinon');
var url = require('url');
var c, spy;
describe('Simple test', function() {
afterEach(function() {
c = {};
spy = {};
});
it('should run the first readme examples', function(done) {
c = new Crawler({
maxConnections: 10,
onDrain: function() {
done();
},
callback: function(error, result) {
expect(typeof result.body).to.equal('string');
}
});
c.queue('http://google.com');
});
it('should run the readme examples', function(done) {
c = new Crawler({
maxConnections: 10,
onDrain: function() {
expect(spy.calledTwice).to.be.true;
done();
},
callback: function(error, result, $) {
var baseUrl = result.uri;
$('a').each(function(index, a) {
var toQueueUrl = url.resolve(baseUrl, $(a).attr('href'));
c.queue(toQueueUrl);
});
}
});
spy = sinon.spy(c, 'queue');
c.queue('http://'+httpbinHost+'/links/1/1');
});
it('should run the with an array queue', function(done) {
c = new Crawler();
c.queue([{
uri: 'http://www.google.com',
jquery: true,
callback : function(error, result, $) //noinspection BadExpressionStatementJS,BadExpressionStatementJS
{
expect($).not.to.be.null;
expect(typeof result.body).to.equal('string');
done();
}
}]);
});
});