UNPKG

sn-rlsnet

Version:

standart-n project

188 lines (152 loc) 5.28 kB
exports.index = (req, res) -> colors = global.controls.lib.colors() console.log 'start'.data res.jsonp({ user: 'tobi' }) exports.get = () -> colors = require('colors') check = require('validator').check sanitize = require('validator').sanitize iconv = require('iconv-lite') get = require 'get' mysql = require 'mysql' colors.setTheme silly: 'rainbow' input: 'grey' verbose: 'cyan' prompt: 'grey' info: 'green' data: 'grey' help: 'cyan' warn: 'yellow' debug: 'blue' error: 'red' insertIntoBase = (caption, name, link) -> values = caption: caption name: name ind: index url: url link: link cn.query 'insert into rlsnet set ?', values, (err) -> if !err console.log caption.data, name.info getCaption = (value) -> value = clearText(value) value = value.replace(/(.*?)<div id="div_nest">(.*?)$/,'$2') value = value.replace(/(.*?)<h2>(.*?)<\/h2>(.*?)<h2>(.*)/i,'$3') value = value.replace(/<img(.*?)>/,'') value = value.replace(/<a(.*?)><\/a>/,'') value = clearTags(value) value getName = (value, res = null) -> if value.match(/<a(.*?)>(.*?)<\/a>/i) res = value.replace(/(.*)<a(.*?)>(.*?)<\/a>(.*)/i,'$3') res = sanitize(res).escape() res = sanitize(res).entityEncode() res = clearTags(res) res getNames = (value, res = []) -> value = clearText(value) value = value.replace(/(.*?)<table border="0" cellspacing="0" cellpadding="0" width="100%" class="rest_nest" id="tblpanel">(.*?)$/,'$2') if value? res = value.match(/<td class="rest_data"(.*?)<\/td>/gi) res checkLink = (link, callback) -> if link? cn.query 'select * from rlsnet where link = ?', link, (err, rows) -> if !(rows? and rows[0]? and rows[0].id > 0) callback() if callback clearText = (text) -> if text? text = text.replace(/\n/gi,'') text = text.replace(/\r/gi,'') text = text.replace(/\t/gi,'') text clearTags = (text) -> if text? text = text.replace(/\&?amp\;?/gi,'') text = text.replace(/\&?nbsp\;?/gi,' ') text = text.replace(/\&?raquo\;?/gi,'') text = text.replace(/\&?laquo\;?/gi,'') text = text.replace(/\&?BO-C\;?/gi,'') text = text.replace(/\&?lt\;?/gi,'') text = text.replace(/\&?gt\;?/gi,'') text = text.replace(/\&?trade\;?/gi,'') text = text.replace(/<?\/?sup>?/gi,'') text = text.replace(/\&?reg\;?/gi,'') text parsePage = (page, link = '') -> if page? caption = getCaption(page) names = getNames(page) if names? names.filter (value, i) -> name = getName (value) if name? and caption? insertIntoBase(caption, name, link) getPage = (link) -> if link? get(link).asBuffer (err, b) -> if !err page = iconv.decode(b, 'win1251') parsePage page, link getLinks = (links) -> if links? links.filter (value, i) -> link = value.replace(/href="(.*?)"/i,'$1') checkLink link, () -> console.log 'link'.data, link.debug getPage link urls = (i = 0) -> def = [ 'http://www.rlsnet.ru/mnn_alf_letter_2.htm' 'http://www.rlsnet.ru/mnn_alf_letter_C0.htm' 'http://www.rlsnet.ru/mnn_alf_letter_C1.htm' 'http://www.rlsnet.ru/mnn_alf_letter_C2.htm' 'http://www.rlsnet.ru/mnn_alf_letter_C3.htm' 'http://www.rlsnet.ru/mnn_alf_letter_C4.htm' 'http://www.rlsnet.ru/mnn_alf_letter_C6.htm' 'http://www.rlsnet.ru/mnn_alf_letter_C7.htm' 'http://www.rlsnet.ru/mnn_alf_letter_C8.htm' 'http://www.rlsnet.ru/mnn_alf_letter_C9.htm' 'http://www.rlsnet.ru/mnn_alf_letter_CA.htm' 'http://www.rlsnet.ru/mnn_alf_letter_CB.htm' 'http://www.rlsnet.ru/mnn_alf_letter_CC.htm' 'http://www.rlsnet.ru/mnn_alf_letter_CD.htm' 'http://www.rlsnet.ru/mnn_alf_letter_CE.htm' 'http://www.rlsnet.ru/mnn_alf_letter_CF.htm' 'http://www.rlsnet.ru/mnn_alf_letter_D0.htm' 'http://www.rlsnet.ru/mnn_alf_letter_D1.htm' 'http://www.rlsnet.ru/mnn_alf_letter_D2.htm' 'http://www.rlsnet.ru/mnn_alf_letter_D3.htm' 'http://www.rlsnet.ru/mnn_alf_letter_D4.htm' 'http://www.rlsnet.ru/mnn_alf_letter_D5.htm' 'http://www.rlsnet.ru/mnn_alf_letter_D6.htm' 'http://www.rlsnet.ru/mnn_alf_letter_D7.htm' 'http://www.rlsnet.ru/mnn_alf_letter_D8.htm' 'http://www.rlsnet.ru/mnn_alf_letter_DD.htm' 'http://www.rlsnet.ru/mnn_alf_letter_DF.htm' ] def[i] if global.dbsettings? cn = mysql.createConnection host : global.dbsettings.connections.mysql.host user : global.dbsettings.connections.mysql.login password : global.dbsettings.connections.mysql.password database : global.dbsettings.connections.mysql.dbname if cn? cn.connect() if global.program? and global.program.index? index = global.program.index else index = 0 url = urls index if url? console.log url.debug get(uri: url).asBuffer (err, b) -> data = iconv.decode(b, 'win1251') if !err console.log 'parsing...'.data div = clearText(data) div = div.replace(/(.*?)<div class="tn_alf_list">(.*?)<div class="new_sub_slices">(.*)/i,'$2') getLinks div.match(/href="(.*?)"/gi)