UNPKG

website-to-json

Version:

Converts all websites to JSON data

225 lines (217 loc) 8.04 kB
var S = require('string') var _ = require('lodash') var trim = require('trim') var array = [ { title: 'imdb', pattern: '(imdb)\.com/title/(.*)/', parse: function($) { return { name: trim($("h1").text()), } } }, { title: 'ceneo list', pattern: 'ceneo\.pl/([a-zA-Z_]+)', parse: function($) { return { products: $('.cat-prod-row').map(function(val) { return { name: trim($(this).find('.cat-prod-row-name').text()), url: 'http://www.ceneo.pl' + $(this).find('.cat-prod-row-name').find('a').attr('href').match(/^\/\d+/)[0], } }).get() } } }, { title: 'ceneo page', pattern: '(ceneo)\.pl/([0-9]+)', parse: function($) { var counter = trim($('.product-reviews-link').text()).split('\r\n') return { name: $('h1').text(), image: 'http:' + $('.product-pictures img').attr('src'), votes_count: parseInt(counter[0]), reviews_count: parseInt(counter[1]), breadcrumbs: trim($('.breadcrumbs dd').text()).split('\r\n').map(trim), score: parseFloat($('.product-score').text()), price: parseFloat($('.price').eq(0).text().replace(',', '.')), //voting: $('.product-feature-voting').html(), reviews: $('.product-review').map(function(val) { return { text: $(this).find('.product-review-body').text(), author: trim($(this).find('.product-reviewer').text()), vote_yes: parseInt($(this).find('.vote-yes').text()), score: parseFloat($(this).find('.review-score-count').text()), is_bought: !!$(this).find('.product-review-pz').text(), vote_no: parseInt($(this).find('.vote-no').text()) } }).get() } } }, { title: 'google search', pattern: 'google\.com', parse: function($) { return { links: $('.g').map(function(val) { return { url: 'aa' } }).get() } } }, { title: 'twitter profile', pattern: 'twitter\.com/[a-zA-Z]+$', parse: function($) { return { name: S($("h1.ProfileHeaderCard-name").text()).trim().s, bio: $(".ProfileHeaderCard-bio.u-dir").text(), url: S($(".ProfileHeaderCard-urlText.u-dir").text()).trim().s } } }, { title: 'github stargazers', //pattern: 'github\.com/[a-zA-Z]+/[a-zA-Z-]+/stargazers\?page\=\d', pattern: 'github\.com/[a-zA-Z]+/[a-zA-Z-]+/stargazers', parse: function($) { return { stargazers: parseInt($("#repos .counter").eq(0).text()), users: $('.follow-list-item').map(function(val) { return { image: $(this).find('h3').text(), url: 'https://www.github.com' + $(this).find('h3').find('a').attr('href'), info: $(this).find('.follow-list-info').text() } }).get() } } }, { title: 'github repo', pattern: 'github\.com/[a-zA-Z]+/[a-zA-Z-]+$', parse: function($) { return { watch: parseInt($(".social-count").eq(0).text()), stars: parseInt($(".social-count").eq(1).text()), forks: parseInt($(".social-count").eq(2).text()), commits: parseInt($(".commits").text().replace(',', '')) } } }, { title: 'github profile', pattern: 'github\.com/[a-zA-Z]+$', parse: function($) { return { //name: $(".vcard-fullname").text(), name: $(".vcard-fullname").text(), bio: $(".user-profile-bio").text(), username: $(".vcard-fullname").text(), email: $(".octicon-mail").next().text(), joined: S($(".octicon-clock").next().next().text()).trim().s, location: S($(".octicon-location").parent().text()).trim().s, url: $(".octicon-link").next().text(), organization: $(".octicon-organization").parent().text(), followers_count: parseInt($(".vcard-stat-count").eq(0).text()), starred_count: parseInt($(".vcard-stat-count").eq(1).text()), following_count: parseInt($(".vcard-stat-count").eq(2).text()), contrib_last_year: $(".contrib-number").eq(0).text(), longest_streak: $(".contrib-number").eq(1).text(), } } }, { title: 'builtwith website', pattern: 'builtwith\.com/.*$', parse: function($) { return { name: $("h1").text(), providers: $(".techItem").map(function(val) { return { category: $(this).prevAll(".titleBox").first().find(".active").text(), name: $(this).find('a').eq(1).text() } }).get() } } }, { title: 'filmweb movie', pattern: 'filmweb\.pl/.*', parse: function($) { var info = $(".filmInfo tr").map(function(val) { var title = $(this).find("th").text(); var value = $(this).find("td").text(); if ($(this).find("td ul").length) { value = $(this).find("td ul li").map(function(val) { return $(this).text() }).get() } return {key: title, value: value} }).get() var actors = $(".filmCast tr").map(function(val) { return {name: $(this).find("td").eq(1).text(), image: $(this).find("td").eq(0).find("img").attr('src'), movie_name: $(this).find("td").eq(3).text(), movie_image: $(this).find("td").eq(4).find("img").attr('src') } }).get(); actors = actors.slice(1) return { name: $(".filmTitle").text(), year: $(".halfSize").text(), original_name: $("h1").parent().next().text(), info: info, url: $(".filmTitle a").attr('href'), small_image: $(".posterLightbox img").attr('src'), big_image: $(".posterLightbox a").attr('href'), time: $(".filmTime").text(), rating: $(".ratingInfo span[property='v:average']").text(), votes: $(".afterPremiere span[property='v:votes']").text(), wants_to_see: $(".afterPremiere span[property='v:votes']").closest('.afterPremiere').next().text(), actors: actors } return array; } }, { title: 'npmjs.js package', pattern: 'npmjs\.com/package/(.*)$', parse: function($) { var releases = $('.last-publisher').next().text(); releases = releases.match('of ([0-9]+) releases'); if (releases) { releases = parseInt(releases[1], 10); } var collaborators = $('.collaborators li a').map(function(i, el) { return $(this).attr('title'); }).get(); var avatars = $('.collaborators li').map(function(i, el) { return $(this).find('a img').attr('src'); }).get(); var name = $('h1 a').eq(0).text(); if (!name) { } return { name: name, short_description: $('.package-description').text(), //referer: result.uri, last_publisher: $('.last-publisher span').eq(0).text(), repo: $('.last-publisher').next().next().find('a').attr('href'), version: $('.last-publisher').next().find('strong').text(), tags: $('.list-of-links').eq(0).find('a').map(function(val) { return $(this).text() }).get(), dependencies: $('.list-of-links').eq(1).find('a').map(function(val) { return $(this).text() }).get(), releases: releases, collaborators: collaborators, //issues: $('#issues .enhanced a').text(), //pr: parseInt($('#pull_requests a').text(), 10), downloads_last_day: parseInt($('.box').eq(1).find('li').eq(0).find('strong').text(), 10), downloads_last_week: parseInt($('.box').eq(1).find('li').eq(1).find('strong').text(), 10), downloads_last_month: parseInt($('.box').eq(1).find('li').eq(2).find('strong').text(), 10), avatars: avatars, license: $('.last-publisher').next().next().next().find('a').text(), published_at: $(this).find('.last-publisher span').eq(1).attr('data-date') //published: $(this).find('p.author span').eq(0).text() } } } ] module.exports = array;