UNPKG

faparser

Version:
410 lines (390 loc) 15.5 kB
/** * Created by aespinilla on 8/3/17. */ const jQuery = require('cheerio') const url = require('url') const BASE_URL = "https://www.filmaffinity.com" module.exports = { parseFilm: parseFilm, parseSearch: parseSearch, parseTrailers: parseTrailers, parseImages: parseImages, parseProReviews: parseProReviews } function parseFilm(data) { try { const content = jQuery(data.body) const film = {} film.url = data.url film.imageUrl = content.find('#movie-main-image-container').find('a').attr('href'); let imageUrlMed = content.find('#movie-main-image-container').find('img').attr('src'); if (imageUrlMed.includes('noimgfull')) { imageUrlMed = BASE_URL + imageUrlMed film.imageUrl = imageUrlMed } film.imageUrlMed = imageUrlMed film.rating = content.find('#movie-rat-avg').attr('content'); film.votes = content.find('#movie-count-rat').find('span').attr('content') film.title = content.find('#main-title').find('span').text().trim(); content.find('.movie-info dt').each(function (index, a) { const bind = jQuery(a).text().trim().toLowerCase(); switch (bind) { case "original title": case "título original": { const element = jQuery(a) const tO = element.next().text().trim() film.titleOrig = tO const akas = [] content.find('dd.akas li').each(function (index, akatitle) { const ak = jQuery(akatitle).text() akas.push(ak) }) if (akas.length != 0) { film.akas = akas film.titleOrig = tO.substring(0, tO.length - 3).trim() } break } case "year": case "año": { film.year = jQuery(a).next().text().trim() break } case "running time": case "duración": { film.running = jQuery(a).next().text().trim() break } case "country": case "país": { film.country = { imgCountry: BASE_URL + jQuery(a).next().find('img').attr('src'), country: jQuery(a).next().find('img').attr('alt'), } break } case "director": case "dirección": { film.directors = [] jQuery(a).next().find('a').each(function (index2, directors) { film.directors.push({ name: jQuery(directors).find('span').text().trim(), request: { query: jQuery(directors).find('span').text().trim(), type: 'DIRECTOR', lang: data.lang } }) }) break } case "screenwriter": case "guión": { film.screenwriter = [] jQuery(a).next().find('.nb span').each(function (index2, guion) { film.screenwriter.push(jQuery(guion).text().trim()) }) break } case "music": case "música": { film.music = []; jQuery(a).next().find('.nb span').each(function (index3, music) { film.music.push(jQuery(music).text().trim()) }) break } case "cinematography": case "fotografía": { film.cinematography = []; jQuery(a).next().find('.nb span').each(function (index3, foto) { film.cinematography.push(jQuery(foto).text().trim()) }) break } case "cast": case "reparto": { film.cast = []; jQuery(a).next().find('a').find('span').each(function (index3, actor) { film.cast.push({ name: jQuery(actor).text().trim(), request: { query: jQuery(actor).text().trim(), type: 'CAST', lang: data.lang } }) }) break } case "producer": case "productora": { film.production = jQuery(a).next().find('.nb span').text().trim() break } case "genre": case "género": { film.genre = [] jQuery(a).next().find('a').each(function (index3, genero) { const link = jQuery(genero).attr('href') const g = jQuery(genero).text().trim() let gnr = url.parse(link, true).query.genre if (!gnr) { gnr = url.parse(link, true).query.topic } film.genre.push({ name: g, request: { query: gnr, type: link.includes('moviegenre.php') ? 'GENRE' : (link.includes('movietopic.php') ? 'TOPIC' : 'TITLE'), lang: data.lang } }) }) break } case "synopsis / plot": case "sinopsis": { film.synopsis = jQuery(a).next().text().trim() break } default: { break } } }) film.streamingPlatforms = { subscription : [], buy : [], rent : [] } content.find( '#stream-wrapper > .body > .sub-title' ).each( function( _, streamingTitle ) { let providers const streamingType = jQuery( streamingTitle ).text().trim().toLowerCase() switch( streamingType ) { case 'suscripción' : providers = film.streamingPlatforms.subscription; break; case 'compra' : providers = film.streamingPlatforms.buy; break; case 'alquiler' : providers = film.streamingPlatforms.rent; break; default: console.warn( 'Streaming type not controlled: ', streamingType ); return; } jQuery( streamingTitle ).next().find( 'a' ).each( function( _, providerNode ) { const url = jQuery( providerNode ).attr( 'href' ) const provider = jQuery( providerNode ).find( 'img' ).attr( 'alt' ).trim() providers.push( { url, provider } ) } ) }); return film } catch (err) { console.error(err) //throw ({code: 4, msg: 'Can not parse film'}) } return {} } function parseSearch(data) { const pathname = url.parse(data.response.request.uri.href).pathname; if (pathname.includes('film')) { const idTemp = pathname.substring(pathname.indexOf('film') + 'film'.length, pathname.indexOf('.')); data.response.lang = data.lang const film = parseFilm(data.response) return { more: false, count: 1, result: [{ id: idTemp, url: data.response.request.uri.href, thumbnail: film.imageUrlMed.replace("mmed", "msmall"), year: film.year, title: film.title, directors: film.directors, cast: film.cast, country: film.country, rating: data.lang == 'es' && film.rating ? film.rating.replace('.', ',') : film.rating, votes: film.votes }] } } if (data.type === 'TOPIC' || data.type === 'GENRE') { const sfilms = parseSpecialSearch({container: jQuery(data.body).find('.title-movies'), lang: data.lang}) return { more: false, count: sfilms.length, result: sfilms } } try { const outPut = {} const films = [] const content = jQuery(data.body) let year; content.find('.se-it').each(function (index, a) { const filmview = {}; const relUrl = jQuery(a).find('.mc-title').find('a').attr('href'); const idMatch = relUrl.match(/.*\/film(\d*)/); filmview.id = idMatch !== null ? idMatch[1] : ""; filmview.url = relUrl; filmview.country = { imgCountry: BASE_URL + jQuery(a).find('.mc-title').find('img').attr('src'), country: jQuery(a).find('.mc-title').find('img').attr('alt') } if (jQuery(a).hasClass('mt')) { year = jQuery(a).find('.ye-w').text(); } filmview.year = year; let thumbnail = jQuery(a).find('.mc-poster').find('img').attr('src'); if (thumbnail.includes('noimgfull')) { thumbnail = BASE_URL + thumbnail } filmview.thumbnail = thumbnail filmview.title = jQuery(a).find('.mc-title').find('a').attr('title').trim(); filmview.directors = []; jQuery(a).find('.mc-director').find('.credits').find('a').each(function (index, b) { filmview.directors.push({ name: jQuery(b).attr('title'), request: { query: jQuery(b).attr('title'), type: 'DIRECTOR', lang: data.lang } }) }) filmview.cast = []; jQuery(a).find('.mc-cast').find('.credits').find('a').each(function (index, d) { filmview.cast.push({ name: jQuery(d).attr('title'), request: { query: jQuery(d).attr('title'), type: 'CAST', lang: data.lang } }) }) filmview.rating = jQuery(a).find('.avgrat-box').text() filmview.votes = jQuery(a).find('.ratcount-box').text().trim() films.push(filmview); }) if (content.find('.see-all-button').length) { outPut.more = true } else { outPut.more = false } outPut.count = films.length outPut.result = films return outPut } catch (err) { console.error(err) } return [] } function parseTrailers(data) { try { const content = jQuery(data.body) const trailers = [] content.find('iframe').each(function (index, data) { const urlt = jQuery(data).attr('src') trailers.push(urlt); }) return trailers } catch (err) { console.error(err) //throw ({code: 4, msg: 'Can not parse film'}) } return [] } function parseImages(data) { const items = [] jQuery(data.body).find('#main-image-wrapper').find('a').each(function (index, item) { const href = jQuery(item).attr('href') if (href.indexOf('.jpg') != -1) { const item = { large: href } if (href.indexOf('large') != -1) { item.thumbnail = href.replace("large", "s200") } else { item.thumbnail = href } items.push(item) } }) return items } function parseProReviews(data) { try { const reviews = []; jQuery(data.body).find('.wrap>table>tbody>tr').each(function (index, element) { const elHtml = jQuery(element) const review = {}; const contryHtml = elHtml.find('.c>img') review.country = { imgCountry: BASE_URL + contryHtml.attr('src'), country: contryHtml.attr('title') }; review.gender = elHtml.find('.gender>span').text().trim(); const authorHtml = elHtml.find('.author'); review.author = authorHtml.find('div').text().trim(); review.source = authorHtml.find('strong').text().trim(); // This is for Filmaffinity review review.source = review.source === "" ? authorHtml.find('em').text().trim() : review.source; review.text = elHtml.find('.text').text().trim().replace(/"/g, ''); review.url = elHtml.find('.text>a').attr('href'); review.bias = elHtml.find('.fas.fa-circle').parent().find('span').text().trim(); reviews.push(review); }); return reviews; } catch (err) { console.error(err) } return []; } function parseSpecialSearch(data) { try { const films = [] data.container.find('.record').each(function (index, element) { const elHtml = jQuery(element) const f = {} f.id = elHtml.find('.movie-card').attr('data-movie-id') f.thumbnail = elHtml.find('.mc-poster img').attr('src') const titleHtml = elHtml.find('.mc-title') f.url = BASE_URL + elHtml.find('a').attr('href') f.title = titleHtml.find('a').attr('title').trim() f.country = { imgCountry: BASE_URL + titleHtml.find('img').attr('src'), country: titleHtml.find('img').attr('alt') } f.year = titleHtml.text().substring(f.title.length + 2).replace(")", "").trim() f.directors = [] elHtml.find('.mc-director .credits a').each(function (index, elDir) { const item = jQuery(elDir) f.directors.push({ name: item.attr('title'), request: { query: item.attr('title'), type: 'DIRECTOR', lang: data.lang } }) }) f.cast = [] elHtml.find('.mc-cast .credits a').each(function (index, elCast) { const item = jQuery(elCast) f.cast.push({ name: item.attr('title'), request: { query: item.attr('title'), type: 'CAST', lang: data.lang } }) }) f.rating = elHtml.find('.avg-w').text().trim() f.votes = elHtml.find('.votes2').text().trim() //console.log(f) films.push(f) }) return films } catch (err) { console.error(err) } return [] }