supercrawler
Version:
A web crawler. Supercrawler automatically crawls websites. Define custom handlers to parse content. Obeys robots.txt, rate limits and concurrency limits.
20 lines (18 loc) • 553 B
JavaScript
var Crawler = require("./Crawler"),
Url = require("./Url"),
DbUrlList = require("./DbUrlList"),
RedisUrlList = require("./RedisUrlList"),
htmlLinkParser = require("./handlers/htmlLinkParser"),
robotsParser = require("./handlers/robotsParser"),
sitemapsParser = require("./handlers/sitemapsParser");
module.exports = {
Crawler: Crawler,
Url: Url,
DbUrlList: DbUrlList,
RedisUrlList: RedisUrlList,
handlers: {
htmlLinkParser: htmlLinkParser,
robotsParser: robotsParser,
sitemapsParser: sitemapsParser
}
};