easy_web_crawler
Version:
Web crawler wrapper around puppeteer module to simply the crawling on ajax/java script enabled pages.
23 lines (19 loc) • 778 B
JavaScript
var Scraper = require('../index.js')
async function main() {
var scraper = new Scraper();
scraper.startWithURLs("https://edition.cnn.com/travel/destinations")
scraper.allowIfMatches(function (url) { return url.indexOf("destinations") > -1; })
scraper.enableAutoCrawler(true)
scraper.saveProgressInFile("hello.db")
scraper.waitBetweenPageLoad(0)
scraper.callbackOnPageLoad(async function (page) {
var title = await page.$eval('.Destination__title', tag => tag.innerText);
var desc = await page.$eval('.Destination__description', tag => tag.innerText);
page.saveResult(desc)
});
scraper.callbackOnFinish(function (result) {
console.log(JSON.stringify(result,null,4))
})
await scraper.start()
}
main()