UNPKG

undertherules

Version:

Track, find, mining, parsing & analyzing data from social networks.

522 lines (483 loc) 16.7 kB
# cluster.coffee # Modules _ = require('lodash') fs = require('fs-extra') os = require('os') kue = require('kue') pug = require('pug') url = require('url') http = require('http') shoe = require('shoe') dnode = require('dnode') level = require('levelup') crypto = require('crypto') stylus = require('stylus') natural = require('natural') cluster = require('cluster') request = require('request') coffeeify = require('coffeeify') browserify = require('browserify') querystring = require('querystring') child_process = require('child_process') # Functions {exec} = child_process {writeFileSync, readFileSync} = fs {removeSync, mkdirsSync, copySync, ensureDirSync} = fs # Environment numCPUs = require('os').cpus().length {CORE_DIR, LEVEL_DIR, STATIC_DIR, HTDOCS_DIR, USER_AGENT} = process.env {KUE_PORT, KUE_HOST, PANEL_PORT, PANEL_HOST, IG_COOKIE} = process.env # Files browserCoffee = "#{HTDOCS_DIR}/browser.coffee" clusterCoffee = "#{CORE_DIR}/cluster.coffee" staticJs = "#{STATIC_DIR}/js" staticImg = "#{STATIC_DIR}/img" staticFaviconIco = "#{STATIC_DIR}/favicon.ico" indexHtml = "#{STATIC_DIR}/index.html" styleCss = "#{STATIC_DIR}/style.css" bundleJs = "#{STATIC_DIR}/bundle.js" htdocsJs = "#{HTDOCS_DIR}/js" htdocsImg = "#{HTDOCS_DIR}/img" htdocsFaviconIco = "#{HTDOCS_DIR}/img/favicon.ico" templatePug = "#{HTDOCS_DIR}/template.pug" styleStyl = "#{HTDOCS_DIR}/style.styl" # Queue queue = kue.createQueue() # Cluster class Cluster @tokenizer:new natural.RegexpTokenizer({pattern:/(https?:\/\/[^\s]+)/g}) @dnodeSingUp:(guid, cb) -> console.log("PID: #{process.pid}\t{#{guid}}\t@dnodeSingUp") if typeof cb isnt 'function' return if not guid? then cb('Error!') graphId = crypto.createHash('md5').update("#{guid}}").digest('hex') value = { graphId:graphId guid:guid timestamp:"#{new Date()}" ready:0 } graph.put(graphId, JSON.stringify(value), (err) -> if not err then cb(null, value) else cb(new Error(err)) ) cb(graphId) @dnodeSingIn:(graphId, passwd, cb) -> if typeof cb isnt 'function' return console.log("PID: #{process.pid}\t[#{graphId}]\t@dnodeSingIn") graph.get(graphId, (err, list) -> if err cb('ACCESS DENIED') else if list cb(null, JSON.parse(list)) else cb('ACCESS DENIED') ) @dnodeUpdate:(graphId, cb) -> console.log("PID: #{process.pid}\t[#{graphId}]\t@dnodeUpdate") if graphId count = 0 Log = level(LEVEL_DIR + "/#{graphId}-log") #, {type:'json'}) Log.createReadStream() .on('data', (data) -> if data.key and data.value count += 1 cb(data) ) .on('error', (err) -> cb({key:"#{new Date()}", value:"Oh my! #{err}"}) ) .on('close', -> cb({key:'count', value:count}) ) .on('end', -> Log.close() ) @inputMessage:(graphId, msg, cb) => if graphId and msg console.log("PID: #{process.pid}\t[#{graphId}]\t@inputMessage") Log = level(LEVEL_DIR + "/#{graphId}-log") logKey = crypto.createHash('md5').update(msg).digest('hex') Log.put(logKey, msg, (err) -> if err then console.log('Ooops!', err) Log.close() ) rawArray = @tokenizer.tokenize(msg) rawlinks = (url.parse(link) for link in rawArray) links = (link.href for link in rawlinks when link.hostname?) for item in links queue.create('mediaAnalyzer', { title:"Media Analyzer. GraphID: #{graphId}." graphId:graphId itemUrl:item }).save() cb(item) @mediaAnalyzer:(job, done) -> {graphId, itemUrl} = job.data command = "curl -X GET '#{itemUrl}/?__a=1' --verbose " command += "--user-agent #{USER_AGENT} --cookie #{IG_COOKIE} " command += "--cookie-jar #{IG_COOKIE}" exec(command, (error, stdout, stderr) -> if stdout console.log("Received #{stdout.length} bytes.") data = JSON.parse(stdout) {id, follows_viewer, is_private, username} = data.user Users = level(LEVEL_DIR + "/#{graphId}-ig-users", { type:'json' }) Users.put(id, data.user, {valueEncoding:'json'}, (err) -> if err then console.log('Ooops!', err) Users.close() ) Nodes = level(LEVEL_DIR + "/#{graphId}-ig-nodes", {type:'json'}) data.user.color = '#000000' Nodes.put("#{id}", data.user, {valueEncoding:'json'}, (err) -> if err then console.log('Ooops!', err) Nodes.close() ) console.log("#{is_private}#{follows_viewer}") switch "#{is_private}#{follows_viewer}" when 'truefalse' return else if not (is_private is true and follows_viewer is false) queue.create('igConnections', { # Followers title:'Get Instagram Followers', query_id:'17851374694183129', after:null, first:20, id:id, graphId:graphId, userName:username }).delay(5).save() else console.log(error) if stderr console.error(stderr) done() ) @igConnections:(job, done) -> {graphId, id, query_id, first, after, userName} = job.data console.log("PID: #{process.pid}\t[#{graphId}]\t@igConnections") params = {query_id:query_id, after:after, first:first, id:id} igUrl = 'https://www.instagram.com/graphql/query/' igUrl += "?#{querystring.stringify(params)}" command = "curl -X GET '#{igUrl}' --verbose " command += "--user-agent #{USER_AGENT} --cookie #{IG_COOKIE} " command += "--cookie-jar #{IG_COOKIE}" console.log('command:', command) exec(command, (error, stdout, stderr) -> if stdout console.log("Received #{stdout.length} bytes.") queue.create('igSave', { title:"Save Instagram: #{query_id}.", jsonData:stdout, query_id:query_id, id:id, graphId:graphId, userName:userName }).delay(5).save() else console.log(error) done() ) @igSave:(job, done) -> {graphId, id, jsonData, query_id, userName} = job.data console.log("PID: #{process.pid}\t[#{graphId}]\t@igSave") {edge_follow, edge_followed_by} = JSON.parse(jsonData).data.user {page_info, edges} = edge_follow or edge_followed_by {has_next_page, end_cursor} = page_info flag = edge_followed_by? if flag query_id = '17851374694183129' else query_id = '17874545323001329' queue.create('igSaveArray', { title:"Save Array Instagram: GraphID: #{id}.", flag:flag, edges:edges, query_id:query_id, after:end_cursor, first:20, id:id, graphId:graphId, userName:userName, has_next_page:has_next_page, end_cursor:end_cursor }).delay(5).save() done() @igSaveArray:(job, done) -> {graphId, flag, edges, id, userName, has_next_page, end_cursor} = job.data {query_id} = job.data console.log("PID: #{process.pid}\t[#{graphId}]\t@igSaveArray") if flag then target = id else source = id nodesArray = ({ type:'put', key:"#{e.node.id}", value:e.node, valueEncoding:'json' } for e in edges) edgesArray = ({ type:'put', key:"#{source or e.node.id}-#{target or e.node.id}", value:{ id:"#{source or e.node.id}-#{target or e.node.id}", source:"#{source or e.node.id}", target:"#{target or e.node.id}" }, valueEncoding:'json' } for e in edges) console.log("PID: #{process.pid}\t[#{graphId}]\t@igSaveArray\tEdges") Edges = level(LEVEL_DIR + "/#{graphId}-ig-edges", {type:'json'}) Edges.batch(edgesArray, (err) -> if err then console.log('Ooops!', err) console.log("PID: #{process.pid}\t[#{graphId}]\tEdges\t[OK]") Edges.close() console.log("PID: #{process.pid}\t[#{graphId}]\t@igSaveArray\tNodes") Nodes = level(LEVEL_DIR + "/#{graphId}-ig-nodes", {type:'json'}) Nodes.batch(nodesArray, (err) -> if err then console.log('Ooops!', err) console.log("PID: #{process.pid}\t[#{graphId}]\tNodes\t[OK]") Nodes.close() if has_next_page queue.create('igConnections', { title:"Get Instagram: #{query_id}.", query_id:query_id, after:end_cursor, first:20, id:id, graphId:graphId userName:userName }).delay(5).save() else queue.create('igSaveJson', { title:"Get Instagram: #{query_id}.", graphId:graphId, query_id:query_id, id:id, userName:userName }).delay(5).save() done() ) ) @igSaveJson:(job, done) -> {graphId, query_id, id, userName} = job.data ig = id console.log("PID: #{process.pid}\t[#{graphId}]\t@igSaveJson\t[#{query_id}]") if query_id isnt '17874545323001329' queue.create('igConnections', { # Following title:'Get Instagram Followers', query_id:'17874545323001329', after:null, first:20, id:id, graphId:graphId, userName:userName }).delay(5).save() done() if query_id is '17874545323001329' # console.log('\nsock:', sock, '\n') graphDone = 0 graphJson = { nodes:[] edges:[] } Nodes = level(LEVEL_DIR + "/#{graphId}-ig-nodes", {type:'json'}) nodeCount = -1 edgeCount = -1 nodeHash = {} Nodes.createReadStream() .on('data', (data) -> {id, username, color} = JSON.parse(data.value) if not color? then coor = '#ec5148s' nodeCount += 1 nodeHash["#{id}"] = "n#{nodeCount}" graphJson.nodes.push({ id:nodeHash["#{id}"], ig:id, label:username, x:Math.floor(Math.random() * (2000 - 1) + 1), y:Math.floor(Math.random() * (2000 - 1) + 1), size:Math.floor(Math.random() * (10 - 1) + 1), color:color }) console.log('[Nodes]', 'nodeHash:', nodeHash["#{id}"], 'id:', id) ) .on('error', (err) -> console.log('[Nodes] Oh my!', err) ) .on('close', -> console.log('[Nodes] Stream closed') Edges = level(LEVEL_DIR + "/#{graphId}-ig-edges", {type:'json'}) Edges.createReadStream() .on('data', (data) -> {source, target} = JSON.parse(data.value) edgeCount += 1 console.log('[Edges] source', source, nodeHash[source]) console.log('[Edges] target', target, nodeHash[target]) graphJson.edges.push({ id:"e#{edgeCount}", source:nodeHash["#{source}"], target:nodeHash["#{target}"] }) ) .on('error', (err) -> console.log('[Edges] Oh my!', err) ) .on('close', -> console.log('[Edges] Stream closed') _json = JSON.stringify(graphJson, null, 2) _jsonName = "#{STATIC_DIR}/files/#{graphId}.json" fs.writeFile(_jsonName, _json, 'utf8', (err) -> if err then console.log(err) else console.log(_jsonName) ) ) .on('end', -> console.log('[Edges] Stream ended') Edges.close() done() ) ) .on('end', -> console.log('[Nodes] Stream ended') Nodes.close() ) @browserify:(job, done) -> console.log("PID: #{process.pid}\t@browserify") {browserCoffee, bundleJs} = job.data bundle = browserify({extensions:['.coffee.md']}) bundle.transform(coffeeify, { bare:false header:false }) bundle.add(browserCoffee) bundle.bundle((error, js) -> throw error if error? writeFileSync(bundleJs, js) done() ) @coffeelint:(job, done) -> console.log("PID: #{process.pid}\t@coffeelint") {files} = job.data command = 'coffeelint ' + "#{files.join(' ')}" exec(command, (err, stdout, stderr) -> console.log(stdout, stderr) done() ) @pugRender:(job, done) -> console.log("PID: #{process.pid}\t@pugRender") {templatePug, indexHtml} = job.data writeFileSync(indexHtml, pug.renderFile(templatePug, {pretty:true})) done() @static:(job, done) -> console.log("PID: #{process.pid}\t@static") {htdocsFaviconIco, staticFaviconIco, htdocsImg, staticImg} = job.data mkdirsSync(job.data.STATIC_DIR) mkdirsSync("#{job.data.STATIC_DIR}/files") copySync(htdocsJs, staticJs) copySync(htdocsImg, staticImg) copySync(htdocsFaviconIco, staticFaviconIco) done() @stylusRender:(job, done) -> console.log("PID: #{process.pid}\t@stylusRender") {styleStyl, styleCss} = job.data handler = (err, css) -> if err then throw err writeFileSync(styleCss, css) content = readFileSync(styleStyl, {encoding:'utf8'}) stylus.render(content, handler) done() # Master if cluster.isMaster ## Kue kue.app.set('title', 'Under The Rules') kue.app.listen(KUE_PORT, KUE_HOST, -> console.log("Kue: http://#{KUE_HOST}:#{KUE_PORT}.") kue.Job.rangeByState('complete', 0, 100000, 'asc', (err, jobs) -> jobs.forEach((job) -> job.remove( -> return ) ) ) ) ## Ecstatic is a simple static file server middleware. ecstatic = require('ecstatic')(STATIC_DIR) server = http.createServer(ecstatic) # Create a HTTP server. ## Starting Dnode. Using dnode via shoe & Install endpoint server.listen(PANEL_PORT, PANEL_HOST, -> console.log("Dnode: http://#{PANEL_HOST}:#{PANEL_PORT}") ) graph = level(LEVEL_DIR + '/graph', {type:'json'}) sock = shoe((stream) -> # Define API object providing integration vith dnode d = dnode({ dnodeUpdate:Cluster.dnodeUpdate dnodeSingUp:Cluster.dnodeSingUp dnodeSingIn:Cluster.dnodeSingIn inputMessage:Cluster.inputMessage }) d.pipe(stream).pipe(d) ) sock.install(server, '/dnode') ensureDirSync(LEVEL_DIR) ## Create Jobs staticJob = queue.create('static', { title:'Copy images from HTDOCS_DIR to STATIC_DIR', STATIC_DIR:STATIC_DIR, htdocsFaviconIco:htdocsFaviconIco, staticFaviconIco:staticFaviconIco, htdocsImg:htdocsImg staticImg:staticImg }).save() staticJob.on('complete', -> queue.create('pugRender', { title:'Render (transform) pug template to html', templatePug:templatePug, indexHtml:indexHtml }).delay(1).save() queue.create('stylusRender', { title:'Render (transform) stylus template to css', styleStyl:styleStyl, styleCss:styleCss }).delay(1).save() queue.create('browserify', { title:'Render (transform) coffee template to js', browserCoffee:browserCoffee, bundleJs:bundleJs }).delay(1).save() queue.create('coffeelint', { title:'Link coffee files', files:[clusterCoffee, browserCoffee] }).delay(1).save() # browserCoffee ) ## **Clean** job list on exit add to class exitHandler = (options, err) -> if err console.log(err.stack) if options.exit process.exit() return if options.cleanup console.log('Buy!') removeSync(STATIC_DIR) ## Do something when app is closing or ctrl+c event or uncaught exceptions process.on('exit', exitHandler.bind(null, {cleanup:true})) process.on('SIGINT', exitHandler.bind(null, {exit:true})) process.on('uncaughtException', exitHandler.bind(null, {exit:true})) i = 1 while i < numCPUs cluster.fork() i += 1 # Worker else queue.process('static', Cluster.static) queue.process('pugRender', Cluster.pugRender) queue.process('stylusRender', Cluster.stylusRender) queue.process('browserify', Cluster.browserify) queue.process('mediaAnalyzer', Cluster.mediaAnalyzer) queue.process('igSave', Cluster.igSave) queue.process('igSaveJson', Cluster.igSaveJson) queue.process('igSaveArray', Cluster.igSaveArray) queue.process('igConnections', Cluster.igConnections) queue.process('coffeelint', Cluster.coffeelint)