regard
Version:
Sugar-interface to access multiple data sources.
111 lines (94 loc) • 2.8 kB
JavaScript
var _ = require('lodash'),
Cheerio = require('cheerio'),
Q = require('q'),
Regard = require('.');
/** Main >> */
var regard = init(),
requestCounter = 0;
var time = new Date();
getAdsFromAutosphere()
.then(function (res) {
return regard.root.ads('write', res);
}, console.log)
.then(function () {
return regard.root.ads();
}, console.log)
.then(function (ads) {
console.log('-- END');
console.log(ads.length +' ads was fetched with '
+ requestCounter+' requests and wrote into ads.json'
+' (in '+ (Math.abs((new Date())-time)/1000) +' seconds)');
});
/** Lambdas >> */
function init() {
return Regard('fs', 'http')
.$('root', __dirname)
.$('root/ads', 'ads.json')
.$('as', 'http://www.autosphere.fr', {$cache: true, headers: {'Accept': 'application/html'}})
.$('as/page', {
$path: 'recherche',
$before: function (req, page, rows) {
console.log(req.endpoint.key, 'GET ', req.path, 'page='+page);
req.context.query = {
page: page,
rows: rows
};
++requestCounter;
return req;
},
$after: function (res) {
return createAdsFromAutospherePage(res.body);
}
})
.$('as/page/total', {
$after: function (res) {
$ = Cheerio.load(res.body);
return _.parseInt($('.pagination li:last-child a').attr('class').substring(5));
}
});
}
function getAdsFromAutosphere(rows) {
rows = rows || 200;
var processing = Q.defer();
regard.as.page.total(1, rows)
.then(function (total) {
return Q.all(_.range(1, total+1).map(_.flow(_.identity, _.partialRight(getAdsFromAutospherePage, rows))));
}, processing.reject)
.then(function (res) {
var ads = _.reduce(res, function (result, value) {
return result.concat(value);
}, []);
processing.resolve(ads);
}, processing.reject);
return processing.promise;
}
function getAdsFromAutospherePage(page, rows) {
page = page || 1;
rows = rows || 100;
return regard.as.page(page, rows);
}
function createAdsFromAutospherePage(html) {
var $ = Cheerio.load(html),
cards = [];
$('.fiche-synth').each(function () {
cards.push(createAdFromAutosphereCard($, this));
});
return cards;
}
function createAdFromAutosphereCard($, card) {
return AutoCard(
$('.marque', card).text(),
$('.modele', card).text(),
$('.serie', card).text(),
$('.annee', card).text(),
$('.km', card).text(),
$('.energie', card).text(),
$('.prix', card).text(),
$('.lien-fiche a', card).attr('href')
);
}
function AutoCard() {
var keys = ['brand', 'model', 'serie', 'year', 'km', 'energy', 'price', 'link'],
values = _.toArray(arguments);
return _.zipObject(keys, values);
}