website-to-json
Version:
Converts all websites to JSON data
69 lines (58 loc) • 1.3 kB
JavaScript
var S = require('string')
var _ = require('lodash')
var trim = require('trim')
var Promise = require('bluebird')
var wtj = require('./index')
var recipes = [{
title: 'imdb',
pattern: '(imdb)\.com/title/(.*)/',
parse: function($) {
return {
name: trim($("h1").text()),
}
}
}]
var Nightmare = require('nightmare');
var nightmare = Nightmare({
show: true,
})
/*wtj.extractUrl('http://www.imdb.com/title/tt0111161/', {
fields: ['data', 'meta', 'keywords'],
keywords: ['moments'],
recipes: recipes
})
.then(function(res) {
console.log(res);
})*/
Promise.all(['onet.pl', 'wp.pl', 'interia.pl', 'gazeta.pl'])
.then(function(val) {
return val
})
.map(function(val) {
return wtj.extractUrl(val, {
nightmare: nightmare,
fields: ['data', 'meta', 'social'],
})
.then(function(res) {
console.log(res);
})
}, {concurrency: 1})
/*wtj.extractUrl('coursera.com', {
nightmare: nightmare,
fields: ['data', 'meta', 'social'],
})
/*wtj.extractUrl('coursera.com', {
nightmare: nightmare,
fields: ['data', 'meta', 'social'],
})
.then(function(res) {
console.log(res);
wtj.extractUrl('onet.pl', {
nightmare: nightmare,
fields: ['data', 'meta', 'social'],
})
.then(function(res) {
console.log(res);
})
//nightmare.end()
})*/