scraperjs
Version:
A complete and versatile web scraper.
84 lines (82 loc) • 2.01 kB
JavaScript
var cheerio = require('cheerio'),
AbstractScraper = require('./AbstractScraper');
/**
* A static scraper. This can only scrape static content, with the
* help of jQuery.
* This version uses cheerio {@link https://github.com/cheeriojs/cheerio}.
*
* @extends {AbstractScraper}
*/
var StaticScraper = function() {
AbstractScraper.call(this);
/**
* jQuery.
*
* @type {!function}
* @private
*/
this.$ = null;
};
StaticScraper.prototype = Object.create(AbstractScraper.prototype);
/**
* @override
* @inheritDoc
*/
StaticScraper.prototype.loadBody = function(done) {
this.$ = cheerio.load(this.body);
done();
return this;
};
/**
* Scrapes the webpage. According to a function, and a callback.
*
* @param {!function(function(), ...?)} scraperFn Function to scrape
* the content. It receives the jQuery function to manipulate the
* DOM, and the args as parameters, if passed.
* @param {!function(?)} callbackFn Function that receives the
* result of the scraping.
* @param {!Array=} args Extra arguments to pass to the scraping
* function.
* @return {!AbstractScraper} This scraper.
* @override
* @public
*/
StaticScraper.prototype.scrape = function(scraperFn, callbackFn, args) {
var result = null, err = null;
args = args || [];
args.unshift(this.$);
try {
result = scraperFn.apply(null, args);
} catch (e) {
err = e;
}
callbackFn(err, result);
return this;
};
/**
* @override
* @inheritDoc
*/
StaticScraper.prototype.close = function() {
return this;
};
/**
* @override
* @inheritDoc
*/
StaticScraper.prototype.clone = function() {
return new StaticScraper();
};
/**
* Creates a static scraper, wrapped around a scraper promise.
*
* @param {!string=} url If provided makes an HTTP GET request to the
* given URL.
* @return {!ScraperPromise} Scraper promise, with a static scraper.
* @public
* @static
*/
StaticScraper.create = function(url) {
return AbstractScraper.create(StaticScraper, url);
};
module.exports = StaticScraper;