bedetheque-scraper
Version:
NodeJS script to scrap the entire database of dbgest.com / bedetheque.com (approx. 260.000+ albums)
86 lines (85 loc) • 3.34 kB
JavaScript
"use strict";
// imageCoverLarge: https://www.bedetheque.com/media/Couvertures/${imageCover}
// imageCoverSmall: https://www.bedetheque.com/cache/thb_couv/${imageCover}
// imageExtractLarge: https://www.bedetheque.com/media/Planches/${imageExtract}
// imageExtractSmall: https://www.bedetheque.com/cache/thb_planches/${imageExtract}
// imageReverseLarge: https://www.bedetheque.com/media/Versos/${imageReverse}
// imageReverseSmall: https://www.bedetheque.com/cache/thb_versos/${imageReverse}
Object.defineProperty(exports, "__esModule", { value: true });
var Album = /** @class */ (function () {
function Album(page, $, serieId, serieTitle) {
this.serieId = serieId;
this.serieTitle = serieTitle;
this.albumId = parseInt(page.children().first().attr('name'), 10);
this.albumTitle = page.find('.album-main .titre').attr('title');
this.imageCover = this.findImage(page, 'browse-couvertures', 'Couvertures');
this.imageExtract = this.findImage(page, 'browse-planches', 'Planches');
this.imageReverse = this.findImage(page, 'browse-versos', 'Versos');
this.voteAverage = this.findVoteAverage(page, $);
this.voteCount = this.findVoteCount(page, $);
this.addDetails(page, $);
}
Album.prototype.findVoteAverage = function (page, $) {
var voteAverage = page.find('.ratingblock strong').text();
return voteAverage ? 20 * parseFloat(voteAverage) : 0;
};
Album.prototype.findVoteCount = function (page, $) {
if (this.voteAverage === null) {
return 0;
}
var voteCount = page.find('.ratingblock p').text();
if (!voteCount) {
return 0;
}
return parseInt(voteCount.match(/\(([0-9]+) vote/)[1], 10);
};
Album.prototype.findImage = function (page, className, path) {
var image = page.find(".sous-couv ." + className).attr('href');
return image
? image.replace("https://www.bedetheque.com/media/" + path + "/", '')
: null;
};
Album.prototype.addDetails = function (page, $) {
var _this = this;
page.find('.infos > li')
.each(function (index, info) {
var pageInfo = $(info);
_this.addDetail(pageInfo);
});
};
Album.prototype.addDetail = function (pageInfo) {
var key = pageInfo.find('label').text().trim()
.toLowerCase()
.replace(' :', '');
var value = pageInfo.text().split(':')[1]
? pageInfo.text().split(':')[1].trim()
: null;
if (!value) {
return;
}
switch (key) {
case 'scénario':
this.scenario = value;
break;
case 'dessin':
this.drawing = value;
break;
case 'couleurs':
this.colors = value;
break;
case 'dépot légal':
this.date = value.slice(0, 7);
break;
case 'editeur':
this.editor = value;
break;
case 'planches':
this.nbrOfPages = parseInt(value, 10);
break;
default:
break;
}
};
return Album;
}());
exports.Album = Album;