tmal
Version:
tuna's myanimelist parser
274 lines (262 loc) • 11.3 kB
JavaScript
const cheerio = require('cheerio');
const _ = require('lodash');
const Promise =require('bluebird');
const baseUrl = require('../utils').baseUrl;
const fetch = require('../utils').fetch;
const parseSidebar = require('../utils').parseSidebar;
const parseStartDate = require('../utils').parseStartDate;
const parseEndDate = require('../utils').parseEndDate;
const getSeason = require('../utils').getSeason;
const asyncq = require('async-q');
const S = require('string');
const moment = require('moment');
const async = require('asyncawait/async');
const await = require('asyncawait/await');
const parseEpisodeDate = require('../utils').getEpisodeAirDate;
const character = require('./characters');
const person = require('./person');
function Anime(){
this.start = null;
this.id = null;
}
Anime.prototype.getAnimeIds = (body) => {
const $ = cheerio.load(body);
const els = $('.hoverinfo_trigger.fl-l.ml12.mr8');
return Promise.resolve(_.map(els,(el) => {
return (_.split($(el).attr('href'),'/'))[4]
}));
};
Anime.prototype.getAnimeById = (id) => {
const url = baseUrl+'anime/'+id;
return fetch(url).then(body => {
const $ = cheerio.load(body);
const aired = (parseSidebar($, 'Aired',false)) ? parseSidebar($, 'Aired',false).replace(new RegExp( "\n", "g" ),"").replace(new RegExp( "\t", "g" ),"") : null;
return {
name : $('h1').text().trim(),
score : parseFloat($('.fl-l.score').text().trim()),
ranked : parseInt($('.numbers.ranked strong').text().trim().substring(1)),
popularity : parseInt($('.numbers.popularity strong').text().trim().substring(1)),
members : parseInt($('.numbers.members strong').text().trim().substring(1)),
cover : $('.ac').eq(0).attr('src'),
description : $('span[itemprop=description]').text().trim().replace(new RegExp( "\n", "g" ),"").replace('[Written by MAL Rewrite]',''),
malId : id,
aired : aired,
episodes : parseInt(parseSidebar($,'Episodes',false)),
rating : parseSidebar($, 'Rating',false),
duration : parseSidebar($, 'Duration',false),
source : (parseSidebar($, 'Source',false) !== 'Manga' || parseSidebar($, 'Source',false) !== 'Anime') ? null : parseSidebar($, 'Source',false),
genres : $('span:contains("Genres:")').siblings('a').map((i, el) => $(el).text()).get(),
type : $('span:contains("Type:")').next('a').text().trim(),
alternatives : {
japanese : (_.isEmpty(parseSidebar($, 'Japanese', true)) ) ? null :_.map(parseSidebar($, 'Japanese', true),(t)=>{
return t.replace('Japanese:','').trim()
}),
english : (_.isEmpty(parseSidebar($, 'English', true)) ) ? null :_.map(parseSidebar($, 'English', true),(t)=>{
return t.replace('English:','').trim()
}),
synoynms : (_.isEmpty(parseSidebar($, 'Synonyms', true)) ) ? null : _.map(parseSidebar($, 'Synonyms', true),(t)=>{
return t.replace('Synonyms:','').trim()
})
},
status : parseSidebar($, 'Status',false),
start_date : (_.isNaN(parseStartDate(aired))) ? null : parseStartDate(aired),
end_date : (_.isNaN(parseEndDate(aired))) ? null : parseEndDate(aired),
season :(_.isNaN(parseStartDate(aired))) ? null : getSeason(parseStartDate(aired)),
relations : $("h2:contains('Related Anime')").next('table').find('tr').map((i, el) => ({
type: $(el).find('td').eq(0).text().trim().slice(0, -1),
name: _.map(_.split($(el).find('td').eq(1).text().trim(),','),(t) => {
return t.trim();
})
})).get()
/*
* TODO: get stuff
* */
};
});
};
Anime.prototype.getAnimeByIdBasic = (id) => {
const url = baseUrl+'anime/'+id;
return fetch(url).then(body => {
const $ = cheerio.load(body);
const aired = (parseSidebar($, 'Aired',false)) ? parseSidebar($, 'Aired',false).replace(new RegExp( "\n", "g" ),"").replace(new RegExp( "\t", "g" ),"") : null;
return {
name : $('h1').text().trim(),
malId : id,
type : $('span:contains("Type:")').next('a').text().trim(),
status : parseSidebar($, 'Status',false),
};
});
};
/*
* TODO: get new/latest animes..
* */
/*
* TODO: getAnimeStaff
* */
/*
* TODO: getAnime reviews,opening themes,ending themes,recommendations,..
* */
Anime.prototype.getAnimeByIdFull = function(id) {
const _this = this;
const getAll = async((id) => {
const animeData = await(_this.getAnimeById(id));
const details = await({
characters : _this.getAnimeCharactersFull(id),
episodes : _this.getAnimeEpisodes(id) ,
pictures : _this.getAnimePictures(id)
});
return _.merge(animeData,details);
});
return getAll(id);
};
Anime.prototype.getAnimePictures = (id) => {
const url = baseUrl+'anime/'+id+'/_/pics'
return fetch(url).then(body => {
const $ = cheerio.load(body);
const images = $('a[href*="images/anime"]');
return _.map(images,(t) => {
return $(t).find('img').attr('src');
});
});
};
Anime.prototype.getAnimeEpisodes = function(id) {
const url = baseUrl+'anime/'+id+'/_/episode';
const _this = this;
return fetch(url).then(body => {
const $ = cheerio.load(body);
const total_episode_number = parseInt(S(_.split($('.di-ib.pl4.fw-n.fs10').text(),'/')[0]).strip(' ','_','-','"','(',')').s);
if(!_.isNumber(total_episode_number))
{
return null;
}
else
{
const episodes = _.range(1, (total_episode_number+1));
return asyncq.mapLimit(episodes,2,(episode)=>{
return _this.getAnimeEpisode(id,episode);
});
}
}).catch(err => {
if(err.type === 404)
{
return null;
}
else
{
throw new Error(err);
}
})
};
Anime.prototype.getAnimeEpisode = (id,episode_number) => {
const url = baseUrl+'anime/'+id+'/_/episode/'+episode_number;
return fetch(url).then(body => {
const $ = cheerio.load(body);
const h2 = $('table h2.fs18');
const span = $('table h2.fs18 span');
const airedMatch = $('table div.fn-grey2').text().match(/((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) [0-9]+, [0-9]{4})\(([A-Z]+)\)/);
let airDate = null;
if (airedMatch !== null) {
airDate = moment(airedMatch[1],'MMMM D, YYYY').add(1,'days').unix();
}
return {
episode_number : parseInt(span.text().replace(/[^\d\.\-]/g, ''), 10),
episode_name : h2.get(0).childNodes[2].nodeValue,
alternative_name : h2.next('p').text().trim(),
air_date : airDate
}
}).catch(err => {
if(err.type === 404)
{
return null;
}
else
{
throw new Error(err);
}
});
};
// 2015-07-07T15:00:00.000Z
Anime.prototype.getTopAnimes = function(page = 0) {
const limit = page * 50;
const _this = this;
const url = baseUrl+'topanime.php?limit='+limit;
return fetch(url).then(body => _this.getAnimeIds(body)).then(animeIds => {
return asyncq.mapLimit(animeIds,1,(id) => {
return _this.getAnimeById(id);
});
});
};
Anime.prototype.getTopAnimesBasic = function(page = 0) {
const limit = page * 50;
const _this = this;
const url = baseUrl+'topanime.php?limit='+limit;
return fetch(url).then(body => _this.getAnimeIds(body)).then(animeIds => {
return asyncq.mapLimit(animeIds,2,(id) => {
return _this.getAnimeByIdBasic(id);
});
});
};
Anime.prototype.getAnimeCharacters = (id) => {
const url = baseUrl+'anime/'+id+'/'+id+'/characters';
return fetch(url).then(body => {
const $ = cheerio.load(body);
const list = $('td > a[href*="/character/"]');
return _.map(list,(t) => {
if(!$(t).hasClass('fw-n'))
{
const vlist = $(t).parent('td').next('td').find('td > a[href*="/people/"]');
const charId = _.split($(t).attr('href'),'/')[4];
const href = $(t).attr('href');
return {
name : $(t).text().trim(),
malCharId : charId,
role : $(t).next().text().trim().replace(new RegExp( "\n", "g" ),""), //$(t).next('td').find('a').parent().find('div').find('small').text()
href :href,
voiceActors : _.map(vlist,(k) => {
return {
name : $(k).text().trim(),
language : $(k).parent('td').find('small').text().trim(), //$(t).next('td').find('a').parent().find('div').find('small').text()
href : $(k).attr('href'),
malPersonId : _.split( $(k).attr('href'),'/')[4],
}
})
}
}
});
});
};
Anime.prototype.getAnimeCharactersFull = (id) => {
const url = baseUrl+'anime/'+id+'/'+id+'/characters';
return fetch(url).then(body => {
const $ = cheerio.load(body);
const list = $('td > a[href*="/character/"]');
const charDetails = async((list)=>{
return await(_.map(list,(t)=>{
if(!$(t).hasClass('fw-n'))
{
const vlist = $(t).parent('td').next('td').find('td > a[href*="/people/"]');
const charId = _.split($(t).attr('href'),'/')[4];
const href = $(t).attr('href');
return await({
name : $(t).text().trim(),
malCharId : _.split($(t).attr('href'),'/')[4],
role : $(t).next().text().trim().replace(new RegExp( "\n", "g" ),""), //$(t).next('td').find('a').parent().find('div').find('small').text()
href : $(t).attr('href'),
details : await(character.getCharacter(charId,href)),
voiceActors : await( _.map(vlist,(k) => {
return {
name : $(k).text().trim(),
language : $(k).parent('td').find('small').text().trim(), //$(t).next('td').find('a').parent().find('div').find('small').text()
href : $(k).attr('href'),
malPersonId : _.split( $(k).attr('href'),'/')[4],
details : await(person.getPerson(_.split( $(k).attr('href'),'/')[4], $(k).attr('href')))
}
}))
});
}
}));
});
return charDetails(list);
});
};
module.exports = new Anime();