website-to-json
Version:
Converts all websites to JSON data
225 lines (217 loc) • 8.04 kB
JavaScript
var S = require('string')
var _ = require('lodash')
var trim = require('trim')
var array = [
{
title: 'imdb',
pattern: '(imdb)\.com/title/(.*)/',
parse: function($) {
return {
name: trim($("h1").text()),
}
}
},
{
title: 'ceneo list',
pattern: 'ceneo\.pl/([a-zA-Z_]+)',
parse: function($) {
return {
products: $('.cat-prod-row').map(function(val) {
return {
name: trim($(this).find('.cat-prod-row-name').text()),
url: 'http://www.ceneo.pl' + $(this).find('.cat-prod-row-name').find('a').attr('href').match(/^\/\d+/)[0],
}
}).get()
}
}
},
{
title: 'ceneo page',
pattern: '(ceneo)\.pl/([0-9]+)',
parse: function($) {
var counter = trim($('.product-reviews-link').text()).split('\r\n')
return {
name: $('h1').text(),
image: 'http:' + $('.product-pictures img').attr('src'),
votes_count: parseInt(counter[0]),
reviews_count: parseInt(counter[1]),
breadcrumbs: trim($('.breadcrumbs dd').text()).split('\r\n').map(trim),
score: parseFloat($('.product-score').text()),
price: parseFloat($('.price').eq(0).text().replace(',', '.')),
//voting: $('.product-feature-voting').html(),
reviews: $('.product-review').map(function(val) {
return {
text: $(this).find('.product-review-body').text(),
author: trim($(this).find('.product-reviewer').text()),
vote_yes: parseInt($(this).find('.vote-yes').text()),
score: parseFloat($(this).find('.review-score-count').text()),
is_bought: !!$(this).find('.product-review-pz').text(),
vote_no: parseInt($(this).find('.vote-no').text())
}
}).get()
}
}
}, {
title: 'google search',
pattern: 'google\.com',
parse: function($) {
return {
links: $('.g').map(function(val) {
return {
url: 'aa'
}
}).get()
}
}
},
{
title: 'twitter profile',
pattern: 'twitter\.com/[a-zA-Z]+$',
parse: function($) {
return {
name: S($("h1.ProfileHeaderCard-name").text()).trim().s,
bio: $(".ProfileHeaderCard-bio.u-dir").text(),
url: S($(".ProfileHeaderCard-urlText.u-dir").text()).trim().s
}
}
},
{
title: 'github stargazers',
//pattern: 'github\.com/[a-zA-Z]+/[a-zA-Z-]+/stargazers\?page\=\d',
pattern: 'github\.com/[a-zA-Z]+/[a-zA-Z-]+/stargazers',
parse: function($) {
return {
stargazers: parseInt($("#repos .counter").eq(0).text()),
users: $('.follow-list-item').map(function(val) {
return {
image: $(this).find('h3').text(),
url: 'https://www.github.com' + $(this).find('h3').find('a').attr('href'),
info: $(this).find('.follow-list-info').text()
}
}).get()
}
}
},
{
title: 'github repo',
pattern: 'github\.com/[a-zA-Z]+/[a-zA-Z-]+$',
parse: function($) {
return {
watch: parseInt($(".social-count").eq(0).text()),
stars: parseInt($(".social-count").eq(1).text()),
forks: parseInt($(".social-count").eq(2).text()),
commits: parseInt($(".commits").text().replace(',', ''))
}
}
},
{
title: 'github profile',
pattern: 'github\.com/[a-zA-Z]+$',
parse: function($) {
return {
//name: $(".vcard-fullname").text(),
name: $(".vcard-fullname").text(),
bio: $(".user-profile-bio").text(),
username: $(".vcard-fullname").text(),
email: $(".octicon-mail").next().text(),
joined: S($(".octicon-clock").next().next().text()).trim().s,
location: S($(".octicon-location").parent().text()).trim().s,
url: $(".octicon-link").next().text(),
organization: $(".octicon-organization").parent().text(),
followers_count: parseInt($(".vcard-stat-count").eq(0).text()),
starred_count: parseInt($(".vcard-stat-count").eq(1).text()),
following_count: parseInt($(".vcard-stat-count").eq(2).text()),
contrib_last_year: $(".contrib-number").eq(0).text(),
longest_streak: $(".contrib-number").eq(1).text(),
}
}
},
{
title: 'builtwith website',
pattern: 'builtwith\.com/.*$',
parse: function($) {
return {
name: $("h1").text(),
providers: $(".techItem").map(function(val) {
return {
category: $(this).prevAll(".titleBox").first().find(".active").text(),
name: $(this).find('a').eq(1).text()
}
}).get()
}
}
},
{
title: 'filmweb movie',
pattern: 'filmweb\.pl/.*',
parse: function($) {
var info = $(".filmInfo tr").map(function(val) {
var title = $(this).find("th").text();
var value = $(this).find("td").text();
if ($(this).find("td ul").length) {
value = $(this).find("td ul li").map(function(val) { return $(this).text() }).get()
}
return {key: title, value: value}
}).get()
var actors = $(".filmCast tr").map(function(val) { return {name: $(this).find("td").eq(1).text(), image: $(this).find("td").eq(0).find("img").attr('src'), movie_name: $(this).find("td").eq(3).text(), movie_image: $(this).find("td").eq(4).find("img").attr('src') } }).get();
actors = actors.slice(1)
return {
name: $(".filmTitle").text(),
year: $(".halfSize").text(),
original_name: $("h1").parent().next().text(),
info: info,
url: $(".filmTitle a").attr('href'),
small_image: $(".posterLightbox img").attr('src'),
big_image: $(".posterLightbox a").attr('href'),
time: $(".filmTime").text(),
rating: $(".ratingInfo span[property='v:average']").text(),
votes: $(".afterPremiere span[property='v:votes']").text(),
wants_to_see: $(".afterPremiere span[property='v:votes']").closest('.afterPremiere').next().text(),
actors: actors
}
return array;
}
},
{
title: 'npmjs.js package',
pattern: 'npmjs\.com/package/(.*)$',
parse: function($) {
var releases = $('.last-publisher').next().text();
releases = releases.match('of ([0-9]+) releases');
if (releases) {
releases = parseInt(releases[1], 10);
}
var collaborators = $('.collaborators li a').map(function(i, el) {
return $(this).attr('title');
}).get();
var avatars = $('.collaborators li').map(function(i, el) {
return $(this).find('a img').attr('src');
}).get();
var name = $('h1 a').eq(0).text();
if (!name) {
}
return {
name: name,
short_description: $('.package-description').text(),
//referer: result.uri,
last_publisher: $('.last-publisher span').eq(0).text(),
repo: $('.last-publisher').next().next().find('a').attr('href'),
version: $('.last-publisher').next().find('strong').text(),
tags: $('.list-of-links').eq(0).find('a').map(function(val) { return $(this).text() }).get(),
dependencies: $('.list-of-links').eq(1).find('a').map(function(val) { return $(this).text() }).get(),
releases: releases,
collaborators: collaborators,
//issues: $('#issues .enhanced a').text(),
//pr: parseInt($('#pull_requests a').text(), 10),
downloads_last_day: parseInt($('.box').eq(1).find('li').eq(0).find('strong').text(), 10),
downloads_last_week: parseInt($('.box').eq(1).find('li').eq(1).find('strong').text(), 10),
downloads_last_month: parseInt($('.box').eq(1).find('li').eq(2).find('strong').text(), 10),
avatars: avatars,
license: $('.last-publisher').next().next().next().find('a').text(),
published_at: $(this).find('.last-publisher span').eq(1).attr('data-date')
//published: $(this).find('p.author span').eq(0).text()
}
}
}
]
module.exports = array;