yamaha-moto-scraper
Version:
Yamaha.es moto scraping tool
116 lines (103 loc) • 3.75 kB
JavaScript
const expect = require('chai').expect;
const cheerio = require('cheerio');
const Yamaha = require('../yamaha-scrape');
const QueryProcessor = require('scrape-query-processor')
const linksFileName = 'yamaha-mobility.html';
const mobilityUrl = 'https://www.yamaha-motor.eu/es/products/scooters/sport/index.aspx';
const fs = require('fs');
const debug = require('debug')('yamaha')
const workDir = 'tmp/yamaha/';
describe('getting yamaha moto pages', function () {
let yamaha = new Yamaha(cheerio, QueryProcessor, {saveFiles: true});
it('should scrape one page with links on "my mobility"', function (done) {
this.timeout(10000);
yamaha.querySite(mobilityUrl, linksFileName, cb);
function cb(err, page) {
// debug(page)
expect(page.length).to.be.not.equal(0);
done()
}
});
it('should parse urls from the mobility page', function (done) {
let file = fs.readFileSync(`${workDir}${linksFileName}`, 'utf8');
let parsed = yamaha.getLinksToMotos(file, '#segmentsholder li>a', 'yamaha-mobility.json');
expect(parsed.length).to.be.above(1);
// expect(parsed[0].indexOf('urban-mobility')).to.be.not.equal(-1);
done();
});
it('should be able to parse scraped page', function (done) {
this.timeout(10000);
let index = 0;
let urls = JSON.parse(fs.readFileSync(`${workDir}yamaha-mobility.json`, 'utf8'));
let pageFileName = urls[index].split('/').pop().replace(/\..+/, '.html');
let queryURL = 'https://www.yamaha-motor.eu' + urls[index] + '?view=featurestechspecs';
if (!fs.existsSync(`${workDir}${pageFileName}`)) {
yamaha.querySite(queryURL, pageFileName, cb);
} else {
parsing(fs.readFileSync(`${workDir}${pageFileName}`, 'utf8'))
}
function parsing(page) {
let parsed = yamaha.parseSpecification(page, queryURL);
debug(parsed);
expect(parsed.powerKW.length).to.be.not.equal(0);
expect(parsed.volume.length).to.be.not.equal(0);
expect(parsed.weight.length).to.be.not.equal(0);
expect(parsed.fuelCapacity.length).to.be.not.equal(0);
expect(parsed.title.length).to.be.not.equal(0);
done()
}
function cb(err, page) {
parsing(page);
}
// yamaha.parallelScraping(urls)
// done()
});
it('should scrape several page from the array of urls', function (done) {
this.timeout(60000);
let file = fs.readFileSync(`${workDir}${linksFileName}`, 'utf8');
let parsed = yamaha.getLinksToMotos(file, '#segmentsholder li>a', 'yamaha-mobility.json');
yamaha.parallelScraping(parsed, null, 'yamaha-mobility').then(function (data) {
debug(data);
done()
})
});
it('should get urls for all the 3 types of scooters', function (done) {
this.timeout(5000);
yamaha.getScooterSectionLinks().then(function (sectionsUrl) {
expect(sectionsUrl.length).to.be.equal(3);
expect(sectionsUrl[0]).to.have.property('name');
expect(sectionsUrl[0]).to.have.property('link');
done()
});
});
it('should get all the pages from all the sections', function (done) {
this.timeout(30000);
let sections = JSON.parse(fs.readFileSync(`${workDir}all-yamaha-scooters.json`, 'utf8'));
let cnt = 0;
let lines = [];
let allPageLinks = [];
function line(url, fileName, cb) {
return function () {
yamaha.querySite('https://www.yamaha-motor.eu' + url, fileName + '.html', cb)
}
}
while (cnt < sections.length) {
// debug(sections)
lines.push(line(sections[cnt].link, sections[cnt].name, callback));
cnt++
}
function callback(err, data) {
debug(err);
allPageLinks.push(yamaha.getLinksToMotos(data, '.segments li > a'));
if (!lines.length) {
debug(allPageLinks)
expect(allPageLinks.length).to.equal(3);
expect(allPageLinks[0].length).to.above(0);
done()
} else {
lines.shift()()
}
}
lines.shift()()
})
});