u-wave-parse-chat-markup
Version:
Abstract chat parser for üWave client applications.
205 lines (202 loc) • 7.38 kB
JavaScript
/**
* Adapted from https://github.com/kevva/url-regex.
*/
function urlRegex() {
var protocol = '(?:[a-z]+://)';
var auth = '(?:\\S+(?::\\S*)?@)?';
var host = '(?:(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)';
var domain = '(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*';
var tld = '(?:\\.(?:[a-z\\u00a1-\\uffff]{2,}))\\.?';
var port = '(?::\\d{2,5})?';
var path = '(?:[/?#][^\\s"]*)?';
var regex = "(?:".concat(protocol, "|www\\.)").concat(auth, "(?:localhost|").concat(host).concat(domain).concat(tld, ")").concat(port).concat(path);
return new RegExp(regex, 'ig');
}
/**
* RegExp that matches a URL at the start of the input.
*/
var linkRx = new RegExp("^".concat(urlRegex().source), 'i');
/**
* Memoize a single-argument function.
*
* Remembers one (input, output) pair, and uses === equality to check if the argument has changed.
*/
function memoize(fn) {
var lastArg;
var lastReturn;
return function (arg) {
if (arg !== lastArg) {
lastArg = arg;
lastReturn = fn(arg);
}
return lastReturn;
};
}
/**
* Escape `str` for use in a regular expression. The resulting snippet will match the input string.
*/
function escapeStringRegExp(str) {
return str.replace(/[|\\{}()[\]^$+*?.]/g, '\\$&');
}
function createToken(type, text, raw) {
if (raw === void 0) { raw = text; }
return { type: type, text: text, raw: raw };
}
/**
* Sort users by username length. Longest usernames first.
*/
var sortMentions = memoize(function (mentions) { return (mentions.slice().sort(function (a, b) { return b.length - a.length; })); });
var makeMentionRegExp = memoize(function (mentions) { return new RegExp("^(".concat(mentions.map(function (mention) { return escapeStringRegExp(mention); }).join('|'), ")(?:\\b|\\s|\\W|$)"), 'i'); });
/**
* Case-insensitively get the correct emoji name from the possible emoji for an
* input string.
*
* @param {Array.<string>} names All possible emoji names.
* @param {string} match The input string.
* @return {string|null} The correct emoji name (including casing), or `null` if
* the requested emoji does not exist.
*/
function findEmoji(names, match) {
var compare = match.toLowerCase();
for (var i = 0; i < names.length; i += 1) {
var name_1 = names[i].toLowerCase();
if (name_1 === compare) {
return names[i];
}
}
return null;
}
function tokenize(text, options) {
var chunk;
var i = 0;
var mentions = sortMentions(options.mentions || []);
var mentionRx = makeMentionRegExp(mentions);
var tokens = [];
// adds a token of type `type` if the current chunk starts with
// a `delim`-delimited string
var delimited = function (type, start, endRx) {
if (chunk[0] === start && chunk[1] !== start) {
var end = 1 + chunk.slice(1).search(endRx);
if (end) {
tokens.push(createToken(type, chunk.slice(1, end)));
i += end + 1;
return true;
}
}
return false;
};
var emoji = function (type, emojiNames) {
var match = /^:([A-Za-z0-9_+-~]+):/.exec(chunk);
if (match) {
// if a whitelist of emoji names is given, only accept emoji from that
// list.
var emojiName = emojiNames ? findEmoji(emojiNames, match[1]) : match[1];
if (emojiName) {
tokens.push(createToken(type, emojiName, match[0]));
i += match[0].length;
return true;
}
}
return false;
};
var mention = function (type, start) {
if (chunk[0] === start) {
var maybeMentionable = chunk.slice(1);
var match = mentionRx.exec(maybeMentionable);
if (match) {
i += 1 + match[1].length;
tokens.push(createToken(type, match[1], chunk.slice(0, i)));
return true;
}
}
return false;
};
var link = function (type) {
var match = linkRx.exec(chunk);
if (match) {
tokens.push(createToken(type, chunk.slice(0, match[0].length)));
i += match[0].length;
return true;
}
return false;
};
// eat spaces
var space = function () {
// .slice again because `i` changed
var m = /^\s+/.exec(text.slice(i));
if (m) {
tokens.push(createToken(7 /* TokenType.Text */, m[0]));
i += m[0].length;
}
};
// tokenize text, just loop until it's done!
chunk = text;
while (chunk) {
var found = emoji(4 /* TokenType.Emoji */, options.emojiNames)
|| delimited(0 /* TokenType.Italic */, '_', /_(\W|$)/)
|| delimited(1 /* TokenType.Bold */, '*', /\*(\W|$)/)
|| delimited(2 /* TokenType.Code */, '`', /`(\W|$)/)
|| delimited(3 /* TokenType.Strike */, '~', /~(\W|$)/)
|| mention(5 /* TokenType.Mention */, '@')
|| link(6 /* TokenType.Link */);
if (!found) {
var end = chunk.indexOf(' ', 1) + /* eat space */ 1;
if (end === 0) { // no match, = -1 + 1
end = chunk.length;
}
// append to previous token if it was also text
if (tokens.length > 0 && tokens[tokens.length - 1].type === 7 /* TokenType.Text */) {
tokens[tokens.length - 1].text += chunk.slice(0, end);
}
else {
tokens.push(createToken(7 /* TokenType.Text */, chunk.slice(0, end)));
}
i += end;
}
space();
chunk = text.slice(i);
}
return tokens;
}
/**
* Adds `http://` to a string if it does not have a URL scheme yet.
*/
function httpify(text) {
if (!/^[a-z]+:/.test(text)) {
return "http://".concat(text);
}
return text;
}
/**
* Parses a chat message into a tree-ish structure.
*/
function parse(message, options) {
if (options === void 0) { options = {}; }
if (typeof message !== 'string') {
throw new TypeError('Expected a string');
}
return tokenize(message, options).map(function (token) {
switch (token.type) {
case 0 /* TokenType.Italic */:
return { type: 'italic', content: parse(token.text, options) };
case 1 /* TokenType.Bold */:
return { type: 'bold', content: parse(token.text, options) };
case 2 /* TokenType.Code */:
return { type: 'code', content: [token.text] };
case 3 /* TokenType.Strike */:
return { type: 'strike', content: parse(token.text, options) };
case 4 /* TokenType.Emoji */:
return { type: 'emoji', name: token.text };
case 5 /* TokenType.Mention */:
return { type: 'mention', mention: token.text.toLowerCase(), raw: token.text };
case 6 /* TokenType.Link */:
return { type: 'link', text: token.text, href: httpify(token.text) };
case 7 /* TokenType.Text */:
return token.text;
/* c8 ignore next 2 */
default:
throw new Error('unreachable');
}
});
}
export { parse as default };