node-web-crawler
Version:
Node Web Crawler is a web spider written with Nodejs. It gives you the full power of jQuery on the server to parse a big number of pages as they are downloaded, asynchronously. Scraping should be simple and fun!
56 lines (49 loc) • 1.78 kB
JavaScript
;
var Crawler = require('../lib/node-web-crawler');
var expect = require('chai').expect;
var sinon = require('sinon');
var httpbinHost = 'localhost:8000';
var c;
describe('Cache features tests', function() {
describe('Cache', function() {
afterEach(function () {
c = {};
});
it.skip('should crawl one url', function (done) {
c = new Crawler({
maxConnections: 1,
cache: true,
jquery: false,
onDrain: function () //noinspection BadExpressionStatementJS,BadExpressionStatementJS
{
expect(spy.calledOnce).to.be.true;
done();
},
callback: function (error, result) {
expect(error).to.be.null;
expect(result.statusCode).to.equal(200);
}
});
var spy = sinon.spy(c, '_buildHttpRequest');
c.queue(['http://'+httpbinHost, 'http://' + httpbinHost, 'http://' + httpbinHost, 'http://' + httpbinHost]);
});
});
describe('Skip Duplicate active', function() {
afterEach(function () {
c = {};
});
it('should not skip one single url', function (done) {
c = new Crawler({
jquery: false,
skipDuplicates: true,
callback: function (error, result) {
expect(error).to.be.null;
expect(result.statusCode).to.equal(200);
done();
},
});
c.queue('http://' + httpbinHost + '/status/200');
});
//it('should skip previous crawled urls', function (done) {});
});
});