hubot-scripts

Version:

Allows you to opt in to a variety of scripts

92 lines (70 loc) • 2.51 kB

text/coffeescript

# Description: # None # # Dependencies: # "htmlparser": "1.7.6" # "soupselect: "0.2.0" # # Configuration: # None # # Commands: # hubot wiki me <query> - Searches for <query> on Wikipedia. # # Author: # h3h _ = require("underscore") _s = require("underscore.string") Select = require("soupselect").select HTMLParser = require "htmlparser" module.exports = (robot) -> robot.respond /(wiki)( me)? (.*)/i, (msg) -> wikiMe msg, msg.match[3], (text, url) -> msg.send text msg.send url if url wikiMe = (msg, query, cb) -> articleURL = makeArticleURL(makeTitleFromQuery(query)) msg.http(articleURL) .header('User-Agent', 'Hubot Wikipedia Script') .get() (err, res, body) -> return cb "Sorry, the tubes are broken." if err if res.statusCode is 301 return cb res.headers.location if /does not have an article/.test body return cb "Wikipedia has no idea what you're talking about." paragraphs = parseHTML(body, "p") bodyText = findBestParagraph(paragraphs) or "Have a look for yourself:" cb bodyText, articleURL # Utility Methods childrenOfType = (root, nodeType) -> return [root] if root?.type is nodeType if root?.children?.length > 0 return (childrenOfType(child, nodeType) for child in root.children) [] findBestParagraph = (paragraphs) -> return null if paragraphs.length is 0 childs = _.flatten childrenOfType(paragraphs[0], 'text') text = (textNode.data for textNode in childs).join '' # remove parentheticals (even nested ones) text = text.replace(/\s*\([^()]*?\)/g, '').replace(/\s*\([^()]*?\)/g, '') text = text.replace(/\s{2,}/g, ' ') # squash whitespace text = text.replace(/\[[\d\s]+\]/g, '') # remove citations text = _s.unescapeHTML(text) # get rid of nasties # if non-letters are the majority in the paragraph, skip it if text.replace(/[^a-zA-Z]/g, '').length < 35 findBestParagraph(paragraphs.slice(1)) else text makeArticleURL = (title) -> "https://en.wikipedia.org/wiki/#{encodeURIComponent(title)}" makeTitleFromQuery = (query) -> strCapitalize(_s.trim(query).replace(/[ ]/g, '_')) parseHTML = (html, selector) -> handler = new HTMLParser.DefaultHandler((() ->), ignoreWhitespace: true ) parser = new HTMLParser.Parser handler parser.parseComplete html Select handler.dom, selector strCapitalize = (str) -> return str.charAt(0).toUpperCase() + str.substring(1);