UNPKG

yamaha-moto-scraper

Version:

Yamaha.es moto scraping tool

116 lines (103 loc) 3.75 kB
const expect = require('chai').expect; const cheerio = require('cheerio'); const Yamaha = require('../yamaha-scrape'); const QueryProcessor = require('scrape-query-processor') const linksFileName = 'yamaha-mobility.html'; const mobilityUrl = 'https://www.yamaha-motor.eu/es/products/scooters/sport/index.aspx'; const fs = require('fs'); const debug = require('debug')('yamaha') const workDir = 'tmp/yamaha/'; describe('getting yamaha moto pages', function () { let yamaha = new Yamaha(cheerio, QueryProcessor, {saveFiles: true}); it('should scrape one page with links on "my mobility"', function (done) { this.timeout(10000); yamaha.querySite(mobilityUrl, linksFileName, cb); function cb(err, page) { // debug(page) expect(page.length).to.be.not.equal(0); done() } }); it('should parse urls from the mobility page', function (done) { let file = fs.readFileSync(`${workDir}${linksFileName}`, 'utf8'); let parsed = yamaha.getLinksToMotos(file, '#segmentsholder li>a', 'yamaha-mobility.json'); expect(parsed.length).to.be.above(1); // expect(parsed[0].indexOf('urban-mobility')).to.be.not.equal(-1); done(); }); it('should be able to parse scraped page', function (done) { this.timeout(10000); let index = 0; let urls = JSON.parse(fs.readFileSync(`${workDir}yamaha-mobility.json`, 'utf8')); let pageFileName = urls[index].split('/').pop().replace(/\..+/, '.html'); let queryURL = 'https://www.yamaha-motor.eu' + urls[index] + '?view=featurestechspecs'; if (!fs.existsSync(`${workDir}${pageFileName}`)) { yamaha.querySite(queryURL, pageFileName, cb); } else { parsing(fs.readFileSync(`${workDir}${pageFileName}`, 'utf8')) } function parsing(page) { let parsed = yamaha.parseSpecification(page, queryURL); debug(parsed); expect(parsed.powerKW.length).to.be.not.equal(0); expect(parsed.volume.length).to.be.not.equal(0); expect(parsed.weight.length).to.be.not.equal(0); expect(parsed.fuelCapacity.length).to.be.not.equal(0); expect(parsed.title.length).to.be.not.equal(0); done() } function cb(err, page) { parsing(page); } // yamaha.parallelScraping(urls) // done() }); it('should scrape several page from the array of urls', function (done) { this.timeout(60000); let file = fs.readFileSync(`${workDir}${linksFileName}`, 'utf8'); let parsed = yamaha.getLinksToMotos(file, '#segmentsholder li>a', 'yamaha-mobility.json'); yamaha.parallelScraping(parsed, null, 'yamaha-mobility').then(function (data) { debug(data); done() }) }); it('should get urls for all the 3 types of scooters', function (done) { this.timeout(5000); yamaha.getScooterSectionLinks().then(function (sectionsUrl) { expect(sectionsUrl.length).to.be.equal(3); expect(sectionsUrl[0]).to.have.property('name'); expect(sectionsUrl[0]).to.have.property('link'); done() }); }); it('should get all the pages from all the sections', function (done) { this.timeout(30000); let sections = JSON.parse(fs.readFileSync(`${workDir}all-yamaha-scooters.json`, 'utf8')); let cnt = 0; let lines = []; let allPageLinks = []; function line(url, fileName, cb) { return function () { yamaha.querySite('https://www.yamaha-motor.eu' + url, fileName + '.html', cb) } } while (cnt < sections.length) { // debug(sections) lines.push(line(sections[cnt].link, sections[cnt].name, callback)); cnt++ } function callback(err, data) { debug(err); allPageLinks.push(yamaha.getLinksToMotos(data, '.segments li > a')); if (!lines.length) { debug(allPageLinks) expect(allPageLinks.length).to.equal(3); expect(allPageLinks[0].length).to.above(0); done() } else { lines.shift()() } } lines.shift()() }) });