gitter-markdown-processor
Version:
parses gitter chat messages, but in its own process
388 lines (339 loc) • 9.27 kB
JavaScript
/* eslint-disable func-names */
;
const crypto = require('crypto');
const marked = require('gitter-marked');
const highlight = require('highlight.js');
const _ = require('underscore');
const util = require('util');
const katex = require('katex');
const htmlencode = require('htmlencode');
const urlJoin = require('url-join');
const matcher = require('./decoration-url-matcher');
const options = {
gfm: true,
tables: true,
sanitize: true,
breaks: true,
linkify: true,
skipComments: true,
};
const lexer = new marked.Lexer(options);
highlight.configure({
classPrefix: '',
languages: [
'apache',
'applescript',
'css',
'bash',
'clojure-repl',
'clojure',
'javascript',
'coffeescript',
'cpp',
'cs',
'd',
'dart',
'delphi',
'diff',
'django',
'dockerfile',
'dos',
'elixir',
'erb',
'erlang-repl',
'erlang',
'fortran',
'fsharp',
'gcode',
'gherkin',
'go',
'gradle',
'groovy',
'haml',
'handlebars',
'haskell',
'http',
'ini',
'java',
'json',
'kotlin',
'less',
'lisp',
'livescript',
'lua',
'makefile',
'markdown',
'mathematica',
'matlab',
'nginx',
'objectivec',
'perl',
'php',
'powershell',
'prolog',
'puppet',
'python',
'q',
'r',
'rib',
'rsl',
'ruby',
'rust',
'scala',
'scheme',
'scilab',
'scss',
'smali',
'smalltalk',
'sml',
'sql',
'stylus',
'swift',
'tcl',
'tex',
'typescript',
'vbnet',
'vbscript-html',
'vbscript',
'vim',
'x86asm',
'xml',
],
});
const startsWith = (string, substring) =>
string
.trim()
.toLowerCase()
.indexOf(substring) === 0;
const replaceScriptUrls = urlString => {
// eslint-disable-next-line no-script-url
if (startsWith(urlString, 'javascript:') || startsWith(urlString, 'data:')) {
/* Rickroll the script kiddies */
return 'https://goo.gl/7NDM3x';
}
return urlString;
};
/* prepend http protocol if URL doesn't use it yet */
const prependHttp = urlString => {
if (
!startsWith(urlString, 'http://') &&
!startsWith(urlString, 'https://') &&
!startsWith(urlString, '//')
) {
return `http://${urlString}`;
}
return urlString;
};
/* use punycode version of url if it contains unicode */
const normalizeIdn = urlString => {
let parsedUrl;
try {
parsedUrl = new URL(urlString);
} catch (err) {
// If the URL was malformed and could not be parsed, then we don't have to
// worry about it being a IDN link anyway because it won't work at all
return urlString;
}
if (startsWith(parsedUrl.host, 'xn--')) {
return parsedUrl.href;
}
return urlString;
};
const RTLO = '\u202E';
const ENCODED_RTLO = '%E2%80%AE';
/* replaces right to left override character */
const replaceRtlo = urlString => urlString.replace(RTLO, ENCODED_RTLO);
function escapeDangerousUrl(urlString) {
if (!urlString) return '';
return [urlString]
.map(replaceScriptUrls)
.map(replaceRtlo)
.map(prependHttp)
.map(normalizeIdn)
.pop();
}
// If camo config is in place, generate proxied URL
function generateProxyUrl(camoUrl, camoSecret, url) {
if (!camoUrl || !camoSecret) {
return url;
}
const digest = crypto
.createHmac('sha1', camoSecret)
.update(url)
.digest('hex');
const encodedUrl = Buffer.from(url, 'utf8').toString('hex');
return urlJoin(camoUrl, digest, encodedUrl);
}
function getRenderer(renderContext, { camoUrl, camoSecret }) {
const renderer = new marked.Renderer();
// Highlight code blocks
renderer.code = function(code, lang) {
lang = String(lang).toLowerCase();
if (lang === 'text') {
return util.format('<pre><code class="text">%s</code></pre>', htmlencode.htmlEncode(code));
}
if (highlight.getLanguage(lang))
return util.format(
'<pre><code class="%s">%s</code></pre>',
lang,
highlight.highlight(lang, code).value,
);
return util.format('<pre><code>%s</code></pre>', highlight.highlightAuto(code).value);
};
// Highlight code blocks
renderer.latex = function(latexCode) {
try {
return katex.renderToString(latexCode);
} catch (e) {
return util.format(
'<pre><code>%s: %s</code></pre>',
htmlencode.htmlEncode(e.message),
htmlencode.htmlEncode(latexCode),
);
}
};
// Extract urls mentions and issues from paragraphs
renderer.paragraph = function(text) {
renderContext.paragraphCount++;
return util.format('<p>%s</p>', text);
};
renderer.issuable = function(type, provider, repo, issue, href, text) {
renderContext.issues.push({
number: issue,
repo: repo || undefined,
});
let out = `<a target="_blank" data-link-type="${type}" data-issue="${issue}"`;
if (href) {
out += util.format(' href="%s"', href);
}
if (provider) {
out += util.format(' data-provider="%s"', provider);
}
if (repo) {
out += util.format(' data-issue-repo="%s"', repo);
}
out += ` class="${type}">${text}</a>`;
return out;
};
renderer.issue = function(provider, repo, issue, href, text) {
return renderer.issuable('issue', provider, repo, issue, href, text);
};
renderer.mr = function(provider, repo, issue, href, text) {
return renderer.issuable('mr', provider, repo, issue, href, text);
};
renderer.pr = function(provider, repo, issue, href, text) {
return renderer.issuable('pr', provider, repo, issue, href, text);
};
renderer.commit = function(provider, repo, sha, href /* , text */) {
const text = `${repo}@${sha.substring(0, 7)}`;
if (!href) {
let baseUrl = 'https://github.com/';
if (provider === 'gitlab') {
baseUrl = 'https://gitlab.com/';
}
href = `${baseUrl + repo}/commit/${sha}`;
}
const out =
`<a href="${href}" target="_blank" ` +
`data-link-type="commit" ` +
`data-provider="${provider}" ` +
`data-commit-sha="${sha}" ` +
`data-commit-repo="${repo}" ` +
`class="commit">${text}</a>`;
return out;
};
renderer.link = (href, title, text) => {
const validatedHref = escapeDangerousUrl(href);
const urlData = matcher(href);
const showTooltip = validatedHref !== href ? 'link-tooltip' : '';
if (urlData) {
return renderer[urlData.type](
urlData.provider,
urlData.repo,
urlData.id,
urlData.href,
urlData.text,
);
}
renderContext.urls.push({ url: validatedHref });
return util.format(
'<a href="%s" rel="nofollow noopener noreferrer" target="_blank" class="link %s">%s</a>',
validatedHref,
showTooltip,
replaceRtlo(text),
);
};
renderer.image = function(href, title, text) {
href = escapeDangerousUrl(href);
href = generateProxyUrl(camoUrl, camoSecret, href);
renderContext.urls.push({ url: href });
if (title) {
return util.format('<img src="%s" title="%s" alt="%s" rel="nofollow">', href, title, text);
}
return util.format('<img src="%s" alt="%s" rel="nofollow">', href, text);
};
renderer.mention = function(href, title, text) {
const screenName = text.charAt(0) === '@' ? text.substring(1) : text;
renderContext.mentions.push({ screenName });
return util.format(
'<span data-link-type="mention" data-screen-name="%s" class="mention">%s</span>',
screenName,
text,
);
};
renderer.groupmention = function(name, text) {
renderContext.mentions.push({ screenName: name, group: true });
return util.format(
'<span data-link-type="groupmention" data-group-name="%s" class="groupmention">%s</span>',
name,
text,
);
};
renderer.email = function(href, title, text) {
escapeDangerousUrl(href);
renderContext.urls.push({ url: href });
return util.format('<a href="%s" rel="nofollow">%s</a>', href, text);
};
renderer.heading = function(text, level /* , raw */) {
return `<h${level}>${text}</h${level}>\n`;
};
renderer.text = function(text) {
/* Used for language detection */
renderContext.plainText.push(text);
return text;
};
return renderer;
}
module.exports = exports = function processChat(text, { camoUrl, camoSecret } = {}) {
const renderContext = {
urls: [],
mentions: [],
issues: [],
plainText: [],
paragraphCount: 0,
};
let html = '';
if (text) {
text = `${text}`; // Force to string
const renderer = getRenderer(renderContext, { camoUrl, camoSecret });
// Reset any references, see https://github.com/gitterHQ/gitter/issues/1041
lexer.tokens = [];
lexer.tokens.links = {};
const tokens = lexer.lex(text);
const parser = new marked.Parser(_.extend({ renderer }, options));
html = parser.parse(tokens);
if (renderContext.paragraphCount === 1) {
html = html.replace(/<\/?p>/g, '');
}
} else {
text = '';
}
return {
text,
html,
urls: renderContext.urls,
mentions: renderContext.mentions,
issues: renderContext.issues,
plainText: renderContext.plainText.join(' '),
};
};