UNPKG

u-wave-parse-chat-markup

Version:

Abstract chat parser for üWave client applications.

205 lines (202 loc) 7.38 kB
/** * Adapted from https://github.com/kevva/url-regex. */ function urlRegex() { var protocol = '(?:[a-z]+://)'; var auth = '(?:\\S+(?::\\S*)?@)?'; var host = '(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)'; var domain = '(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*'; var tld = '(?:\\.(?:[a-z\\u00a1-\\uffff]{2,}))\\.?'; var port = '(?::\\d{2,5})?'; var path = '(?:[/?#][^\\s"]*)?'; var regex = "(?:".concat(protocol, "|www\\.)").concat(auth, "(?:localhost|").concat(host).concat(domain).concat(tld, ")").concat(port).concat(path); return new RegExp(regex, 'ig'); } /** * RegExp that matches a URL at the start of the input. */ var linkRx = new RegExp("^".concat(urlRegex().source), 'i'); /** * Memoize a single-argument function. * * Remembers one (input, output) pair, and uses === equality to check if the argument has changed. */ function memoize(fn) { var lastArg; var lastReturn; return function (arg) { if (arg !== lastArg) { lastArg = arg; lastReturn = fn(arg); } return lastReturn; }; } /** * Escape `str` for use in a regular expression. The resulting snippet will match the input string. */ function escapeStringRegExp(str) { return str.replace(/[|\\{}()[\]^$+*?.]/g, '\\$&'); } function createToken(type, text, raw) { if (raw === void 0) { raw = text; } return { type: type, text: text, raw: raw }; } /** * Sort users by username length. Longest usernames first. */ var sortMentions = memoize(function (mentions) { return (mentions.slice().sort(function (a, b) { return b.length - a.length; })); }); var makeMentionRegExp = memoize(function (mentions) { return new RegExp("^(".concat(mentions.map(function (mention) { return escapeStringRegExp(mention); }).join('|'), ")(?:\\b|\\s|\\W|$)"), 'i'); }); /** * Case-insensitively get the correct emoji name from the possible emoji for an * input string. * * @param {Array.<string>} names All possible emoji names. * @param {string} match The input string. * @return {string|null} The correct emoji name (including casing), or `null` if * the requested emoji does not exist. */ function findEmoji(names, match) { var compare = match.toLowerCase(); for (var i = 0; i < names.length; i += 1) { var name_1 = names[i].toLowerCase(); if (name_1 === compare) { return names[i]; } } return null; } function tokenize(text, options) { var chunk; var i = 0; var mentions = sortMentions(options.mentions || []); var mentionRx = makeMentionRegExp(mentions); var tokens = []; // adds a token of type `type` if the current chunk starts with // a `delim`-delimited string var delimited = function (type, start, endRx) { if (chunk[0] === start && chunk[1] !== start) { var end = 1 + chunk.slice(1).search(endRx); if (end) { tokens.push(createToken(type, chunk.slice(1, end))); i += end + 1; return true; } } return false; }; var emoji = function (type, emojiNames) { var match = /^:([A-Za-z0-9_+-~]+):/.exec(chunk); if (match) { // if a whitelist of emoji names is given, only accept emoji from that // list. var emojiName = emojiNames ? findEmoji(emojiNames, match[1]) : match[1]; if (emojiName) { tokens.push(createToken(type, emojiName, match[0])); i += match[0].length; return true; } } return false; }; var mention = function (type, start) { if (chunk[0] === start) { var maybeMentionable = chunk.slice(1); var match = mentionRx.exec(maybeMentionable); if (match) { i += 1 + match[1].length; tokens.push(createToken(type, match[1], chunk.slice(0, i))); return true; } } return false; }; var link = function (type) { var match = linkRx.exec(chunk); if (match) { tokens.push(createToken(type, chunk.slice(0, match[0].length))); i += match[0].length; return true; } return false; }; // eat spaces var space = function () { // .slice again because `i` changed var m = /^\s+/.exec(text.slice(i)); if (m) { tokens.push(createToken(7 /* TokenType.Text */, m[0])); i += m[0].length; } }; // tokenize text, just loop until it's done! chunk = text; while (chunk) { var found = emoji(4 /* TokenType.Emoji */, options.emojiNames) || delimited(0 /* TokenType.Italic */, '_', /_(\W|$)/) || delimited(1 /* TokenType.Bold */, '*', /\*(\W|$)/) || delimited(2 /* TokenType.Code */, '`', /`(\W|$)/) || delimited(3 /* TokenType.Strike */, '~', /~(\W|$)/) || mention(5 /* TokenType.Mention */, '@') || link(6 /* TokenType.Link */); if (!found) { var end = chunk.indexOf(' ', 1) + /* eat space */ 1; if (end === 0) { // no match, = -1 + 1 end = chunk.length; } // append to previous token if it was also text if (tokens.length > 0 && tokens[tokens.length - 1].type === 7 /* TokenType.Text */) { tokens[tokens.length - 1].text += chunk.slice(0, end); } else { tokens.push(createToken(7 /* TokenType.Text */, chunk.slice(0, end))); } i += end; } space(); chunk = text.slice(i); } return tokens; } /** * Adds `http://` to a string if it does not have a URL scheme yet. */ function httpify(text) { if (!/^[a-z]+:/.test(text)) { return "http://".concat(text); } return text; } /** * Parses a chat message into a tree-ish structure. */ function parse(message, options) { if (options === void 0) { options = {}; } if (typeof message !== 'string') { throw new TypeError('Expected a string'); } return tokenize(message, options).map(function (token) { switch (token.type) { case 0 /* TokenType.Italic */: return { type: 'italic', content: parse(token.text, options) }; case 1 /* TokenType.Bold */: return { type: 'bold', content: parse(token.text, options) }; case 2 /* TokenType.Code */: return { type: 'code', content: [token.text] }; case 3 /* TokenType.Strike */: return { type: 'strike', content: parse(token.text, options) }; case 4 /* TokenType.Emoji */: return { type: 'emoji', name: token.text }; case 5 /* TokenType.Mention */: return { type: 'mention', mention: token.text.toLowerCase(), raw: token.text }; case 6 /* TokenType.Link */: return { type: 'link', text: token.text, href: httpify(token.text) }; case 7 /* TokenType.Text */: return token.text; /* c8 ignore next 2 */ default: throw new Error('unreachable'); } }); } export { parse as default };