matrix-react-sdk
Version:
SDK for matrix.org using React
542 lines (522 loc) • 72.4 kB
JavaScript
"use strict";
var _interopRequireDefault = require("@babel/runtime/helpers/interopRequireDefault");
Object.defineProperty(exports, "__esModule", {
value: true
});
exports.EMOJI_REGEX = void 0;
Object.defineProperty(exports, "Linkify", {
enumerable: true,
get: function () {
return _Linkify.Linkify;
}
});
exports.bodyToDiv = bodyToDiv;
exports.bodyToHtml = bodyToHtml;
exports.bodyToSpan = bodyToSpan;
exports.checkBlockNode = checkBlockNode;
exports.formatEmojis = formatEmojis;
exports.getHtmlText = getHtmlText;
exports.isUrlPermitted = isUrlPermitted;
Object.defineProperty(exports, "linkifyAndSanitizeHtml", {
enumerable: true,
get: function () {
return _Linkify.linkifyAndSanitizeHtml;
}
});
Object.defineProperty(exports, "linkifyElement", {
enumerable: true,
get: function () {
return _Linkify.linkifyElement;
}
});
exports.sanitizedHtmlNode = sanitizedHtmlNode;
exports.topicToHtml = topicToHtml;
exports.unicodeToShortcode = unicodeToShortcode;
var _defineProperty2 = _interopRequireDefault(require("@babel/runtime/helpers/defineProperty"));
var _react = _interopRequireDefault(require("react"));
var _sanitizeHtml = _interopRequireDefault(require("sanitize-html"));
var _classnames = _interopRequireDefault(require("classnames"));
var _katex = _interopRequireDefault(require("katex"));
var _htmlEntities = require("html-entities");
var _escapeHtml = _interopRequireDefault(require("escape-html"));
var _emojibaseBindings = require("@matrix-org/emojibase-bindings");
var _SettingsStore = _interopRequireDefault(require("./settings/SettingsStore"));
var _Reply = require("./utils/Reply");
var _UrlUtils = require("./utils/UrlUtils");
var _Linkify = require("./Linkify");
var _strings = require("./utils/strings");
function ownKeys(e, r) { var t = Object.keys(e); if (Object.getOwnPropertySymbols) { var o = Object.getOwnPropertySymbols(e); r && (o = o.filter(function (r) { return Object.getOwnPropertyDescriptor(e, r).enumerable; })), t.push.apply(t, o); } return t; }
function _objectSpread(e) { for (var r = 1; r < arguments.length; r++) { var t = null != arguments[r] ? arguments[r] : {}; r % 2 ? ownKeys(Object(t), !0).forEach(function (r) { (0, _defineProperty2.default)(e, r, t[r]); }) : Object.getOwnPropertyDescriptors ? Object.defineProperties(e, Object.getOwnPropertyDescriptors(t)) : ownKeys(Object(t)).forEach(function (r) { Object.defineProperty(e, r, Object.getOwnPropertyDescriptor(t, r)); }); } return e; } /*
Copyright 2024 New Vector Ltd.
Copyright 2019 Michael Telatynski <7t3chguy@gmail.com>
Copyright 2019 The Matrix.org Foundation C.I.C.
Copyright 2017, 2018 New Vector Ltd
Copyright 2015, 2016 OpenMarket Ltd
SPDX-License-Identifier: AGPL-3.0-only OR GPL-3.0-only
Please see LICENSE files in the repository root for full details.
*/
// Anything outside the basic multilingual plane will be a surrogate pair
const SURROGATE_PAIR_PATTERN = /([\ud800-\udbff])([\udc00-\udfff])/;
// And there a bunch more symbol characters that emojibase has within the
// BMP, so this includes the ranges from 'letterlike symbols' to
// 'miscellaneous symbols and arrows' which should catch all of them
// (with plenty of false positives, but that's OK)
const SYMBOL_PATTERN = /([\u2100-\u2bff])/;
// Regex pattern for non-emoji characters that can appear in an "all-emoji" message
// (Zero-Width Space, other whitespace)
const EMOJI_SEPARATOR_REGEX = /[\u200B\s]/g;
// Regex for emoji. This includes any RGI_Emoji sequence followed by an optional
// emoji presentation VS (U+FE0F), but not those sequences that are followed by
// a text presentation VS (U+FE0E). We also count lone regional indicators
// (U+1F1E6-U+1F1FF). Technically this regex produces false negatives for emoji
// followed by U+FE0E when the emoji doesn't have a text variant, but in
// practice this doesn't matter.
const EMOJI_REGEX = exports.EMOJI_REGEX = (() => {
try {
// Per our support policy, v mode is available to us, but we still don't
// want the app to completely crash on older platforms. We use the
// constructor here to avoid a syntax error on such platforms.
return new RegExp("\\p{RGI_Emoji}(?!\\uFE0E)(?:(?<!\\uFE0F)\\uFE0F)?|[\\u{1f1e6}-\\u{1f1ff}]", "v");
} catch (_e) {
// v mode not supported; fall back to matching nothing
return /(?!)/;
}
})();
const BIGEMOJI_REGEX = (() => {
try {
return new RegExp(`^(${EMOJI_REGEX.source})+$`, "iv");
} catch (_e) {
// Fall back, just like for EMOJI_REGEX
return /(?!)/;
}
})();
/*
* Return true if the given string contains emoji
* Uses a much, much simpler regex than emojibase's so will give false
* positives, but useful for fast-path testing strings to see if they
* need emojification.
*/
function mightContainEmoji(str) {
return !!str && (SURROGATE_PAIR_PATTERN.test(str) || SYMBOL_PATTERN.test(str));
}
/**
* Returns the shortcode for an emoji character.
*
* @param {String} char The emoji character
* @return {String} The shortcode (such as :thumbup:)
*/
function unicodeToShortcode(char) {
const shortcodes = (0, _emojibaseBindings.getEmojiFromUnicode)(char)?.shortcodes;
return shortcodes?.length ? `:${shortcodes[0]}:` : "";
}
/*
* Given an untrusted HTML string, return a React node with an sanitized version
* of that HTML.
*/
function sanitizedHtmlNode(insaneHtml) {
const saneHtml = (0, _sanitizeHtml.default)(insaneHtml, _Linkify.sanitizeHtmlParams);
return /*#__PURE__*/_react.default.createElement("div", {
dangerouslySetInnerHTML: {
__html: saneHtml
},
dir: "auto"
});
}
function getHtmlText(insaneHtml) {
return (0, _sanitizeHtml.default)(insaneHtml, {
allowedTags: [],
allowedAttributes: {},
selfClosing: [],
allowedSchemes: [],
disallowedTagsMode: "discard"
});
}
/**
* Tests if a URL from an untrusted source may be safely put into the DOM
* The biggest threat here is javascript: URIs.
* Note that the HTML sanitiser library has its own internal logic for
* doing this, to which we pass the same list of schemes. This is used in
* other places we need to sanitise URLs.
* @return true if permitted, otherwise false
*/
function isUrlPermitted(inputUrl) {
try {
// URL parser protocol includes the trailing colon
return _UrlUtils.PERMITTED_URL_SCHEMES.includes(new URL(inputUrl).protocol.slice(0, -1));
} catch (e) {
return false;
}
}
// this is the same as the above except with less rewriting
const composerSanitizeHtmlParams = _objectSpread(_objectSpread({}, _Linkify.sanitizeHtmlParams), {}, {
transformTags: {
"code": _Linkify.transformTags["code"],
"*": _Linkify.transformTags["*"]
}
});
// reduced set of allowed tags to avoid turning topics into Myspace
const topicSanitizeHtmlParams = _objectSpread(_objectSpread({}, _Linkify.sanitizeHtmlParams), {}, {
allowedTags: ["font",
// custom to matrix for IRC-style font coloring
"del",
// for markdown
"s", "a", "sup", "sub", "b", "i", "u", "strong", "em", "strike", "br", "div", "span"]
});
class BaseHighlighter {
constructor(highlightClass, highlightLink) {
this.highlightClass = highlightClass;
this.highlightLink = highlightLink;
}
/**
* Apply the highlights to a section of text
*
* @param {string} safeSnippet The snippet of text to apply the highlights
* to. This input must be sanitised as it will be treated as HTML.
* @param {string[]} safeHighlights A list of substrings to highlight,
* sorted by descending length.
*
* returns a list of results (strings for HtmlHighligher, react nodes for
* TextHighlighter).
*/
applyHighlights(safeSnippet, safeHighlights) {
let lastOffset = 0;
let offset;
let nodes = [];
const safeHighlight = safeHighlights[0];
while ((offset = safeSnippet.toLowerCase().indexOf(safeHighlight.toLowerCase(), lastOffset)) >= 0) {
// handle preamble
if (offset > lastOffset) {
const subSnippet = safeSnippet.substring(lastOffset, offset);
nodes = nodes.concat(this.applySubHighlights(subSnippet, safeHighlights));
}
// do highlight. use the original string rather than safeHighlight
// to preserve the original casing.
const endOffset = offset + safeHighlight.length;
nodes.push(this.processSnippet(safeSnippet.substring(offset, endOffset), true));
lastOffset = endOffset;
}
// handle postamble
if (lastOffset !== safeSnippet.length) {
const subSnippet = safeSnippet.substring(lastOffset, undefined);
nodes = nodes.concat(this.applySubHighlights(subSnippet, safeHighlights));
}
return nodes;
}
applySubHighlights(safeSnippet, safeHighlights) {
if (safeHighlights[1]) {
// recurse into this range to check for the next set of highlight matches
return this.applyHighlights(safeSnippet, safeHighlights.slice(1));
} else {
// no more highlights to be found, just return the unhighlighted string
return [this.processSnippet(safeSnippet, false)];
}
}
}
class HtmlHighlighter extends BaseHighlighter {
/* highlight the given snippet if required
*
* snippet: content of the span; must have been sanitised
* highlight: true to highlight as a search match
*
* returns an HTML string
*/
processSnippet(snippet, highlight) {
if (!highlight) {
// nothing required here
return snippet;
}
let span = `<span class="${this.highlightClass}">${snippet}</span>`;
if (this.highlightLink) {
span = `<a href="${encodeURI(this.highlightLink)}">${span}</a>`;
}
return span;
}
}
const emojiToHtmlSpan = emoji => `<span class='mx_Emoji' title='${unicodeToShortcode(emoji)}'>${emoji}</span>`;
const emojiToJsxSpan = (emoji, key) => /*#__PURE__*/_react.default.createElement("span", {
key: key,
className: "mx_Emoji",
title: unicodeToShortcode(emoji)
}, emoji);
/**
* Wraps emojis in <span> to style them separately from the rest of message. Consecutive emojis (and modifiers) are wrapped
* in the same <span>.
* @param {string} message the text to format
* @param {boolean} isHtmlMessage whether the message contains HTML
* @returns if isHtmlMessage is true, returns an array of strings, otherwise return an array of React Elements for emojis
* and plain text for everything else
*/
function formatEmojis(message, isHtmlMessage) {
const emojiToSpan = isHtmlMessage ? emojiToHtmlSpan : emojiToJsxSpan;
const result = [];
if (!message) return result;
let text = "";
let key = 0;
for (const data of _strings.graphemeSegmenter.segment(message)) {
if (EMOJI_REGEX.test(data.segment)) {
if (text) {
result.push(text);
text = "";
}
result.push(emojiToSpan(data.segment, key));
key++;
} else {
text += data.segment;
}
}
if (text) {
result.push(text);
}
return result;
}
function analyseEvent(content, highlights, opts = {}) {
let sanitizeParams = _Linkify.sanitizeHtmlParams;
if (opts.forComposerQuote) {
sanitizeParams = composerSanitizeHtmlParams;
}
try {
const isFormattedBody = content.format === "org.matrix.custom.html" && typeof content.formatted_body === "string";
let bodyHasEmoji = false;
let isHtmlMessage = false;
let safeBody; // safe, sanitised HTML, preferred over `strippedBody` which is fully plaintext
// sanitizeHtml can hang if an unclosed HTML tag is thrown at it
// A search for `<foo` will make the browser crash an alternative would be to escape HTML special characters
// but that would bring no additional benefit as the highlighter does not work with those special chars
const safeHighlights = highlights?.filter(highlight => !highlight.includes("<")).map(highlight => (0, _sanitizeHtml.default)(highlight, sanitizeParams));
let formattedBody = typeof content.formatted_body === "string" ? content.formatted_body : null;
const plainBody = typeof content.body === "string" ? content.body : "";
if (opts.stripReplyFallback && formattedBody) formattedBody = (0, _Reply.stripHTMLReply)(formattedBody);
const strippedBody = opts.stripReplyFallback ? (0, _Reply.stripPlainReply)(plainBody) : plainBody;
bodyHasEmoji = mightContainEmoji(isFormattedBody ? formattedBody : plainBody);
const highlighter = safeHighlights?.length ? new HtmlHighlighter("mx_EventTile_searchHighlight", opts.highlightLink) : null;
if (isFormattedBody) {
if (highlighter) {
// XXX: We sanitize the HTML whilst also highlighting its text nodes, to avoid accidentally trying
// to highlight HTML tags themselves. However, this does mean that we don't highlight textnodes which
// are interrupted by HTML tags (not that we did before) - e.g. foo<span/>bar won't get highlighted
// by an attempt to search for 'foobar'. Then again, the search query probably wouldn't work either
// XXX: hacky bodge to temporarily apply a textFilter to the sanitizeParams structure.
sanitizeParams.textFilter = function (safeText) {
return highlighter.applyHighlights(safeText, safeHighlights).join("");
};
}
safeBody = (0, _sanitizeHtml.default)(formattedBody, sanitizeParams);
const phtml = new DOMParser().parseFromString(safeBody, "text/html");
const isPlainText = phtml.body.innerHTML === phtml.body.textContent;
isHtmlMessage = !isPlainText;
if (isHtmlMessage && _SettingsStore.default.getValue("feature_latex_maths")) {
[...phtml.querySelectorAll("div[data-mx-maths], span[data-mx-maths]")].forEach(e => {
e.outerHTML = _katex.default.renderToString((0, _htmlEntities.decode)(e.getAttribute("data-mx-maths")), {
throwOnError: false,
displayMode: e.tagName == "DIV",
output: "htmlAndMathml"
});
});
safeBody = phtml.body.innerHTML;
}
} else if (highlighter) {
safeBody = highlighter.applyHighlights((0, _escapeHtml.default)(plainBody), safeHighlights).join("");
}
return {
bodyHasEmoji,
isHtmlMessage,
strippedBody,
safeBody,
isFormattedBody
};
} finally {
delete sanitizeParams.textFilter;
}
}
function bodyToDiv(content, highlights, opts = {}, ref) {
const {
strippedBody,
formattedBody,
emojiBodyElements,
className
} = bodyToNode(content, highlights, opts);
return formattedBody ? /*#__PURE__*/_react.default.createElement("div", {
key: "body",
ref: ref,
className: className,
dangerouslySetInnerHTML: {
__html: formattedBody
},
dir: "auto"
}) : /*#__PURE__*/_react.default.createElement("div", {
key: "body",
ref: ref,
className: className,
dir: "auto"
}, emojiBodyElements || strippedBody);
}
function bodyToSpan(content, highlights, opts = {}, ref, includeDir = true) {
const {
strippedBody,
formattedBody,
emojiBodyElements,
className
} = bodyToNode(content, highlights, opts);
return formattedBody ? /*#__PURE__*/_react.default.createElement("span", {
key: "body",
ref: ref,
className: className,
dangerouslySetInnerHTML: {
__html: formattedBody
},
dir: includeDir ? "auto" : undefined
}) : /*#__PURE__*/_react.default.createElement("span", {
key: "body",
ref: ref,
className: className,
dir: includeDir ? "auto" : undefined
}, emojiBodyElements || strippedBody);
}
function bodyToNode(content, highlights, opts = {}) {
const eventInfo = analyseEvent(content, highlights, opts);
let emojiBody = false;
if (!opts.disableBigEmoji && eventInfo.bodyHasEmoji) {
const contentBody = eventInfo.safeBody ?? eventInfo.strippedBody;
let contentBodyTrimmed = contentBody !== undefined ? contentBody.trim() : "";
// Remove zero width joiner, zero width spaces and other spaces in body
// text. This ensures that emojis with spaces in between or that are made
// up of multiple unicode characters are still counted as purely emoji
// messages.
contentBodyTrimmed = contentBodyTrimmed.replace(EMOJI_SEPARATOR_REGEX, "");
const match = BIGEMOJI_REGEX.exec(contentBodyTrimmed);
emojiBody = match?.[0]?.length === contentBodyTrimmed.length && (
// Prevent user pills expanding for users with only emoji in
// their username. Permalinks (links in pills) can be any URL
// now, so we just check for an HTTP-looking thing.
eventInfo.strippedBody === eventInfo.safeBody ||
// replies have the html fallbacks, account for that here
content.formatted_body === undefined || !content.formatted_body.includes("http:") && !content.formatted_body.includes("https:"));
}
const className = (0, _classnames.default)({
"mx_EventTile_body": true,
"mx_EventTile_bigEmoji": emojiBody,
"markdown-body": eventInfo.isHtmlMessage && !emojiBody,
// Override the global `notranslate` class set by the top-level `matrixchat` div.
"translate": true
});
let formattedBody = eventInfo.safeBody;
if (eventInfo.isFormattedBody && eventInfo.bodyHasEmoji && eventInfo.safeBody) {
// This has to be done after the emojiBody check as to not break big emoji on replies
formattedBody = formatEmojis(eventInfo.safeBody, true).join("");
}
let emojiBodyElements;
if (!eventInfo.safeBody && eventInfo.bodyHasEmoji) {
emojiBodyElements = formatEmojis(eventInfo.strippedBody, false);
}
return {
strippedBody: eventInfo.strippedBody,
formattedBody,
emojiBodyElements,
className
};
}
/**
* Turn a matrix event body into html
*
* content: 'content' of the MatrixEvent
*
* highlights: optional list of words to highlight, ordered by longest word first
*
* opts.highlightLink: optional href to add to highlighted words
* opts.disableBigEmoji: optional argument to disable the big emoji class.
* opts.stripReplyFallback: optional argument specifying the event is a reply and so fallback needs removing
* opts.forComposerQuote: optional param to lessen the url rewriting done by sanitization, for quoting into composer
* opts.ref: React ref to attach to any React components returned (not compatible with opts.returnString)
*/
function bodyToHtml(content, highlights, opts = {}) {
const eventInfo = analyseEvent(content, highlights, opts);
let formattedBody = eventInfo.safeBody;
if (eventInfo.isFormattedBody && eventInfo.bodyHasEmoji && formattedBody) {
// This has to be done after the emojiBody check above as to not break big emoji on replies
formattedBody = formatEmojis(eventInfo.safeBody, true).join("");
}
return formattedBody ?? eventInfo.strippedBody;
}
/**
* Turn a room topic into html
* @param topic plain text topic
* @param htmlTopic optional html topic
* @param ref React ref to attach to any React components returned
* @param allowExtendedHtml whether to allow extended HTML tags such as headings and lists
* @return The HTML-ified node.
*/
function topicToHtml(topic, htmlTopic, ref, allowExtendedHtml = false) {
if (!_SettingsStore.default.getValue("feature_html_topic")) {
htmlTopic = undefined;
}
let isFormattedTopic = !!htmlTopic;
let topicHasEmoji = false;
let safeTopic = "";
try {
topicHasEmoji = mightContainEmoji(isFormattedTopic ? htmlTopic : topic);
if (isFormattedTopic) {
safeTopic = (0, _sanitizeHtml.default)(htmlTopic, allowExtendedHtml ? _Linkify.sanitizeHtmlParams : topicSanitizeHtmlParams);
if (topicHasEmoji) {
safeTopic = formatEmojis(safeTopic, true).join("");
}
}
} catch {
isFormattedTopic = false; // Fall back to plain-text topic
}
let emojiBodyElements;
if (!isFormattedTopic && topicHasEmoji) {
emojiBodyElements = formatEmojis(topic, false);
}
if (isFormattedTopic) {
if (!safeTopic) return null;
return /*#__PURE__*/_react.default.createElement("span", {
ref: ref,
dangerouslySetInnerHTML: {
__html: safeTopic
},
dir: "auto"
});
}
if (!emojiBodyElements && !topic) return null;
return /*#__PURE__*/_react.default.createElement("span", {
ref: ref,
dir: "auto"
}, emojiBodyElements || topic);
}
/**
* Returns if a node is a block element or not.
* Only takes html nodes into account that are allowed in matrix messages.
*
* @param {Node} node
* @returns {bool}
*/
function checkBlockNode(node) {
switch (node.nodeName) {
case "H1":
case "H2":
case "H3":
case "H4":
case "H5":
case "H6":
case "PRE":
case "BLOCKQUOTE":
case "P":
case "UL":
case "OL":
case "LI":
case "HR":
case "TABLE":
case "THEAD":
case "TBODY":
case "TR":
case "TH":
case "TD":
return true;
case "DIV":
// don't treat math nodes as block nodes for deserializing
return !node.hasAttribute("data-mx-maths");
default:
return false;
}
}
//# sourceMappingURL=data:application/json;charset=utf-8;base64,{"version":3,"names":["_react","_interopRequireDefault","require","_sanitizeHtml","_classnames","_katex","_htmlEntities","_escapeHtml","_emojibaseBindings","_SettingsStore","_Reply","_UrlUtils","_Linkify","_strings","ownKeys","e","r","t","Object","keys","getOwnPropertySymbols","o","filter","getOwnPropertyDescriptor","enumerable","push","apply","_objectSpread","arguments","length","forEach","_defineProperty2","default","getOwnPropertyDescriptors","defineProperties","defineProperty","SURROGATE_PAIR_PATTERN","SYMBOL_PATTERN","EMOJI_SEPARATOR_REGEX","EMOJI_REGEX","exports","RegExp","_e","BIGEMOJI_REGEX","source","mightContainEmoji","str","test","unicodeToShortcode","char","shortcodes","getEmojiFromUnicode","sanitizedHtmlNode","insaneHtml","saneHtml","sanitizeHtml","sanitizeHtmlParams","createElement","dangerouslySetInnerHTML","__html","dir","getHtmlText","allowedTags","allowedAttributes","selfClosing","allowedSchemes","disallowedTagsMode","isUrlPermitted","inputUrl","PERMITTED_URL_SCHEMES","includes","URL","protocol","slice","composerSanitizeHtmlParams","transformTags","topicSanitizeHtmlParams","BaseHighlighter","constructor","highlightClass","highlightLink","applyHighlights","safeSnippet","safeHighlights","lastOffset","offset","nodes","safeHighlight","toLowerCase","indexOf","subSnippet","substring","concat","applySubHighlights","endOffset","processSnippet","undefined","HtmlHighlighter","snippet","highlight","span","encodeURI","emojiToHtmlSpan","emoji","emojiToJsxSpan","key","className","title","formatEmojis","message","isHtmlMessage","emojiToSpan","result","text","data","graphemeSegmenter","segment","analyseEvent","content","highlights","opts","sanitizeParams","forComposerQuote","isFormattedBody","format","formatted_body","bodyHasEmoji","safeBody","map","formattedBody","plainBody","body","stripReplyFallback","stripHTMLReply","strippedBody","stripPlainReply","highlighter","textFilter","safeText","join","phtml","DOMParser","parseFromString","isPlainText","innerHTML","textContent","SettingsStore","getValue","querySelectorAll","outerHTML","katex","renderToString","decode","getAttribute","throwOnError","displayMode","tagName","output","escapeHtml","bodyToDiv","ref","emojiBodyElements","bodyToNode","bodyToSpan","includeDir","eventInfo","emojiBody","disableBigEmoji","contentBody","contentBodyTrimmed","trim","replace","match","exec","classNames","bodyToHtml","topicToHtml","topic","htmlTopic","allowExtendedHtml","isFormattedTopic","topicHasEmoji","safeTopic","checkBlockNode","node","nodeName","hasAttribute"],"sources":["../src/HtmlUtils.tsx"],"sourcesContent":["/*\nCopyright 2024 New Vector Ltd.\nCopyright 2019 Michael Telatynski <7t3chguy@gmail.com>\nCopyright 2019 The Matrix.org Foundation C.I.C.\nCopyright 2017, 2018 New Vector Ltd\nCopyright 2015, 2016 OpenMarket Ltd\n\nSPDX-License-Identifier: AGPL-3.0-only OR GPL-3.0-only\nPlease see LICENSE files in the repository root for full details.\n*/\n\nimport React, { LegacyRef, ReactNode } from \"react\";\nimport sanitizeHtml from \"sanitize-html\";\nimport classNames from \"classnames\";\nimport katex from \"katex\";\nimport { decode } from \"html-entities\";\nimport { IContent } from \"matrix-js-sdk/src/matrix\";\nimport { Optional } from \"matrix-events-sdk\";\nimport escapeHtml from \"escape-html\";\nimport { getEmojiFromUnicode } from \"@matrix-org/emojibase-bindings\";\n\nimport { IExtendedSanitizeOptions } from \"./@types/sanitize-html\";\nimport SettingsStore from \"./settings/SettingsStore\";\nimport { stripHTMLReply, stripPlainReply } from \"./utils/Reply\";\nimport { PERMITTED_URL_SCHEMES } from \"./utils/UrlUtils\";\nimport { sanitizeHtmlParams, transformTags } from \"./Linkify\";\nimport { graphemeSegmenter } from \"./utils/strings\";\n\nexport { Linkify, linkifyElement, linkifyAndSanitizeHtml } from \"./Linkify\";\n\n// Anything outside the basic multilingual plane will be a surrogate pair\nconst SURROGATE_PAIR_PATTERN = /([\\ud800-\\udbff])([\\udc00-\\udfff])/;\n// And there a bunch more symbol characters that emojibase has within the\n// BMP, so this includes the ranges from 'letterlike symbols' to\n// 'miscellaneous symbols and arrows' which should catch all of them\n// (with plenty of false positives, but that's OK)\nconst SYMBOL_PATTERN = /([\\u2100-\\u2bff])/;\n\n// Regex pattern for non-emoji characters that can appear in an \"all-emoji\" message\n// (Zero-Width Space, other whitespace)\nconst EMOJI_SEPARATOR_REGEX = /[\\u200B\\s]/g;\n\n// Regex for emoji. This includes any RGI_Emoji sequence followed by an optional\n// emoji presentation VS (U+FE0F), but not those sequences that are followed by\n// a text presentation VS (U+FE0E). We also count lone regional indicators\n// (U+1F1E6-U+1F1FF). Technically this regex produces false negatives for emoji\n// followed by U+FE0E when the emoji doesn't have a text variant, but in\n// practice this doesn't matter.\nexport const EMOJI_REGEX = (() => {\n    try {\n        // Per our support policy, v mode is available to us, but we still don't\n        // want the app to completely crash on older platforms. We use the\n        // constructor here to avoid a syntax error on such platforms.\n        return new RegExp(\"\\\\p{RGI_Emoji}(?!\\\\uFE0E)(?:(?<!\\\\uFE0F)\\\\uFE0F)?|[\\\\u{1f1e6}-\\\\u{1f1ff}]\", \"v\");\n    } catch (_e) {\n        // v mode not supported; fall back to matching nothing\n        return /(?!)/;\n    }\n})();\n\nconst BIGEMOJI_REGEX = (() => {\n    try {\n        return new RegExp(`^(${EMOJI_REGEX.source})+$`, \"iv\");\n    } catch (_e) {\n        // Fall back, just like for EMOJI_REGEX\n        return /(?!)/;\n    }\n})();\n\n/*\n * Return true if the given string contains emoji\n * Uses a much, much simpler regex than emojibase's so will give false\n * positives, but useful for fast-path testing strings to see if they\n * need emojification.\n */\nfunction mightContainEmoji(str?: string): boolean {\n    return !!str && (SURROGATE_PAIR_PATTERN.test(str) || SYMBOL_PATTERN.test(str));\n}\n\n/**\n * Returns the shortcode for an emoji character.\n *\n * @param {String} char The emoji character\n * @return {String} The shortcode (such as :thumbup:)\n */\nexport function unicodeToShortcode(char: string): string {\n    const shortcodes = getEmojiFromUnicode(char)?.shortcodes;\n    return shortcodes?.length ? `:${shortcodes[0]}:` : \"\";\n}\n\n/*\n * Given an untrusted HTML string, return a React node with an sanitized version\n * of that HTML.\n */\nexport function sanitizedHtmlNode(insaneHtml: string): ReactNode {\n    const saneHtml = sanitizeHtml(insaneHtml, sanitizeHtmlParams);\n\n    return <div dangerouslySetInnerHTML={{ __html: saneHtml }} dir=\"auto\" />;\n}\n\nexport function getHtmlText(insaneHtml: string): string {\n    return sanitizeHtml(insaneHtml, {\n        allowedTags: [],\n        allowedAttributes: {},\n        selfClosing: [],\n        allowedSchemes: [],\n        disallowedTagsMode: \"discard\",\n    });\n}\n\n/**\n * Tests if a URL from an untrusted source may be safely put into the DOM\n * The biggest threat here is javascript: URIs.\n * Note that the HTML sanitiser library has its own internal logic for\n * doing this, to which we pass the same list of schemes. This is used in\n * other places we need to sanitise URLs.\n * @return true if permitted, otherwise false\n */\nexport function isUrlPermitted(inputUrl: string): boolean {\n    try {\n        // URL parser protocol includes the trailing colon\n        return PERMITTED_URL_SCHEMES.includes(new URL(inputUrl).protocol.slice(0, -1));\n    } catch (e) {\n        return false;\n    }\n}\n\n// this is the same as the above except with less rewriting\nconst composerSanitizeHtmlParams: IExtendedSanitizeOptions = {\n    ...sanitizeHtmlParams,\n    transformTags: {\n        \"code\": transformTags[\"code\"],\n        \"*\": transformTags[\"*\"],\n    },\n};\n\n// reduced set of allowed tags to avoid turning topics into Myspace\nconst topicSanitizeHtmlParams: IExtendedSanitizeOptions = {\n    ...sanitizeHtmlParams,\n    allowedTags: [\n        \"font\", // custom to matrix for IRC-style font coloring\n        \"del\", // for markdown\n        \"s\",\n        \"a\",\n        \"sup\",\n        \"sub\",\n        \"b\",\n        \"i\",\n        \"u\",\n        \"strong\",\n        \"em\",\n        \"strike\",\n        \"br\",\n        \"div\",\n        \"span\",\n    ],\n};\n\nabstract class BaseHighlighter<T extends React.ReactNode> {\n    public constructor(\n        public highlightClass: string,\n        public highlightLink?: string,\n    ) {}\n\n    /**\n     * Apply the highlights to a section of text\n     *\n     * @param {string} safeSnippet The snippet of text to apply the highlights\n     *     to. This input must be sanitised as it will be treated as HTML.\n     * @param {string[]} safeHighlights A list of substrings to highlight,\n     *     sorted by descending length.\n     *\n     * returns a list of results (strings for HtmlHighligher, react nodes for\n     * TextHighlighter).\n     */\n    public applyHighlights(safeSnippet: string, safeHighlights: string[]): T[] {\n        let lastOffset = 0;\n        let offset: number;\n        let nodes: T[] = [];\n\n        const safeHighlight = safeHighlights[0];\n        while ((offset = safeSnippet.toLowerCase().indexOf(safeHighlight.toLowerCase(), lastOffset)) >= 0) {\n            // handle preamble\n            if (offset > lastOffset) {\n                const subSnippet = safeSnippet.substring(lastOffset, offset);\n                nodes = nodes.concat(this.applySubHighlights(subSnippet, safeHighlights));\n            }\n\n            // do highlight. use the original string rather than safeHighlight\n            // to preserve the original casing.\n            const endOffset = offset + safeHighlight.length;\n            nodes.push(this.processSnippet(safeSnippet.substring(offset, endOffset), true));\n\n            lastOffset = endOffset;\n        }\n\n        // handle postamble\n        if (lastOffset !== safeSnippet.length) {\n            const subSnippet = safeSnippet.substring(lastOffset, undefined);\n            nodes = nodes.concat(this.applySubHighlights(subSnippet, safeHighlights));\n        }\n        return nodes;\n    }\n\n    private applySubHighlights(safeSnippet: string, safeHighlights: string[]): T[] {\n        if (safeHighlights[1]) {\n            // recurse into this range to check for the next set of highlight matches\n            return this.applyHighlights(safeSnippet, safeHighlights.slice(1));\n        } else {\n            // no more highlights to be found, just return the unhighlighted string\n            return [this.processSnippet(safeSnippet, false)];\n        }\n    }\n\n    protected abstract processSnippet(snippet: string, highlight: boolean): T;\n}\n\nclass HtmlHighlighter extends BaseHighlighter<string> {\n    /* highlight the given snippet if required\n     *\n     * snippet: content of the span; must have been sanitised\n     * highlight: true to highlight as a search match\n     *\n     * returns an HTML string\n     */\n    protected processSnippet(snippet: string, highlight: boolean): string {\n        if (!highlight) {\n            // nothing required here\n            return snippet;\n        }\n\n        let span = `<span class=\"${this.highlightClass}\">${snippet}</span>`;\n\n        if (this.highlightLink) {\n            span = `<a href=\"${encodeURI(this.highlightLink)}\">${span}</a>`;\n        }\n        return span;\n    }\n}\n\nconst emojiToHtmlSpan = (emoji: string): string =>\n    `<span class='mx_Emoji' title='${unicodeToShortcode(emoji)}'>${emoji}</span>`;\nconst emojiToJsxSpan = (emoji: string, key: number): JSX.Element => (\n    <span key={key} className=\"mx_Emoji\" title={unicodeToShortcode(emoji)}>\n        {emoji}\n    </span>\n);\n\n/**\n * Wraps emojis in <span> to style them separately from the rest of message. Consecutive emojis (and modifiers) are wrapped\n * in the same <span>.\n * @param {string} message the text to format\n * @param {boolean} isHtmlMessage whether the message contains HTML\n * @returns if isHtmlMessage is true, returns an array of strings, otherwise return an array of React Elements for emojis\n * and plain text for everything else\n */\nexport function formatEmojis(message: string | undefined, isHtmlMessage?: false): JSX.Element[];\nexport function formatEmojis(message: string | undefined, isHtmlMessage: true): string[];\nexport function formatEmojis(message: string | undefined, isHtmlMessage?: boolean): (JSX.Element | string)[] {\n    const emojiToSpan = isHtmlMessage ? emojiToHtmlSpan : emojiToJsxSpan;\n    const result: (JSX.Element | string)[] = [];\n    if (!message) return result;\n\n    let text = \"\";\n    let key = 0;\n\n    for (const data of graphemeSegmenter.segment(message)) {\n        if (EMOJI_REGEX.test(data.segment)) {\n            if (text) {\n                result.push(text);\n                text = \"\";\n            }\n            result.push(emojiToSpan(data.segment, key));\n            key++;\n        } else {\n            text += data.segment;\n        }\n    }\n    if (text) {\n        result.push(text);\n    }\n    return result;\n}\n\ninterface EventAnalysis {\n    bodyHasEmoji: boolean;\n    isHtmlMessage: boolean;\n    strippedBody: string;\n    safeBody?: string; // safe, sanitised HTML, preferred over `strippedBody` which is fully plaintext\n    isFormattedBody: boolean;\n}\n\nexport interface EventRenderOpts {\n    highlightLink?: string;\n    disableBigEmoji?: boolean;\n    stripReplyFallback?: boolean;\n    forComposerQuote?: boolean;\n}\n\nfunction analyseEvent(content: IContent, highlights: Optional<string[]>, opts: EventRenderOpts = {}): EventAnalysis {\n    let sanitizeParams = sanitizeHtmlParams;\n    if (opts.forComposerQuote) {\n        sanitizeParams = composerSanitizeHtmlParams;\n    }\n\n    try {\n        const isFormattedBody =\n            content.format === \"org.matrix.custom.html\" && typeof content.formatted_body === \"string\";\n        let bodyHasEmoji = false;\n        let isHtmlMessage = false;\n\n        let safeBody: string | undefined; // safe, sanitised HTML, preferred over `strippedBody` which is fully plaintext\n\n        // sanitizeHtml can hang if an unclosed HTML tag is thrown at it\n        // A search for `<foo` will make the browser crash an alternative would be to escape HTML special characters\n        // but that would bring no additional benefit as the highlighter does not work with those special chars\n        const safeHighlights = highlights\n            ?.filter((highlight: string): boolean => !highlight.includes(\"<\"))\n            .map((highlight: string): string => sanitizeHtml(highlight, sanitizeParams));\n\n        let formattedBody = typeof content.formatted_body === \"string\" ? content.formatted_body : null;\n        const plainBody = typeof content.body === \"string\" ? content.body : \"\";\n\n        if (opts.stripReplyFallback && formattedBody) formattedBody = stripHTMLReply(formattedBody);\n        const strippedBody = opts.stripReplyFallback ? stripPlainReply(plainBody) : plainBody;\n        bodyHasEmoji = mightContainEmoji(isFormattedBody ? formattedBody! : plainBody);\n\n        const highlighter = safeHighlights?.length\n            ? new HtmlHighlighter(\"mx_EventTile_searchHighlight\", opts.highlightLink)\n            : null;\n\n        if (isFormattedBody) {\n            if (highlighter) {\n                // XXX: We sanitize the HTML whilst also highlighting its text nodes, to avoid accidentally trying\n                // to highlight HTML tags themselves. However, this does mean that we don't highlight textnodes which\n                // are interrupted by HTML tags (not that we did before) - e.g. foo<span/>bar won't get highlighted\n                // by an attempt to search for 'foobar'.  Then again, the search query probably wouldn't work either\n                // XXX: hacky bodge to temporarily apply a textFilter to the sanitizeParams structure.\n                sanitizeParams.textFilter = function (safeText) {\n                    return highlighter.applyHighlights(safeText, safeHighlights!).join(\"\");\n                };\n            }\n\n            safeBody = sanitizeHtml(formattedBody!, sanitizeParams);\n            const phtml = new DOMParser().parseFromString(safeBody, \"text/html\");\n            const isPlainText = phtml.body.innerHTML === phtml.body.textContent;\n            isHtmlMessage = !isPlainText;\n\n            if (isHtmlMessage && SettingsStore.getValue(\"feature_latex_maths\")) {\n                [...phtml.querySelectorAll<HTMLElement>(\"div[data-mx-maths], span[data-mx-maths]\")].forEach((e) => {\n                    e.outerHTML = katex.renderToString(decode(e.getAttribute(\"data-mx-maths\")), {\n                        throwOnError: false,\n                        displayMode: e.tagName == \"DIV\",\n                        output: \"htmlAndMathml\",\n                    });\n                });\n                safeBody = phtml.body.innerHTML;\n            }\n        } else if (highlighter) {\n            safeBody = highlighter.applyHighlights(escapeHtml(plainBody), safeHighlights!).join(\"\");\n        }\n\n        return { bodyHasEmoji, isHtmlMessage, strippedBody, safeBody, isFormattedBody };\n    } finally {\n        delete sanitizeParams.textFilter;\n    }\n}\n\nexport function bodyToDiv(\n    content: IContent,\n    highlights: Optional<string[]>,\n    opts: EventRenderOpts = {},\n    ref?: React.Ref<HTMLDivElement>,\n): ReactNode {\n    const { strippedBody, formattedBody, emojiBodyElements, className } = bodyToNode(content, highlights, opts);\n\n    return formattedBody ? (\n        <div\n            key=\"body\"\n            ref={ref}\n            className={className}\n            dangerouslySetInnerHTML={{ __html: formattedBody }}\n            dir=\"auto\"\n        />\n    ) : (\n        <div key=\"body\" ref={ref} className={className} dir=\"auto\">\n            {emojiBodyElements || strippedBody}\n        </div>\n    );\n}\n\nexport function bodyToSpan(\n    content: IContent,\n    highlights: Optional<string[]>,\n    opts: EventRenderOpts = {},\n    ref?: React.Ref<HTMLSpanElement>,\n    includeDir = true,\n): ReactNode {\n    const { strippedBody, formattedBody, emojiBodyElements, className } = bodyToNode(content, highlights, opts);\n\n    return formattedBody ? (\n        <span\n            key=\"body\"\n            ref={ref}\n            className={className}\n            dangerouslySetInnerHTML={{ __html: formattedBody }}\n            dir={includeDir ? \"auto\" : undefined}\n        />\n    ) : (\n        <span key=\"body\" ref={ref} className={className} dir={includeDir ? \"auto\" : undefined}>\n            {emojiBodyElements || strippedBody}\n        </span>\n    );\n}\n\ninterface BodyToNodeReturn {\n    strippedBody: string;\n    formattedBody?: string;\n    emojiBodyElements: JSX.Element[] | undefined;\n    className: string;\n}\n\nfunction bodyToNode(content: IContent, highlights: Optional<string[]>, opts: EventRenderOpts = {}): BodyToNodeReturn {\n    const eventInfo = analyseEvent(content, highlights, opts);\n\n    let emojiBody = false;\n    if (!opts.disableBigEmoji && eventInfo.bodyHasEmoji) {\n        const contentBody = eventInfo.safeBody ?? eventInfo.strippedBody;\n        let contentBodyTrimmed = contentBody !== undefined ? contentBody.trim() : \"\";\n\n        // Remove zero width joiner, zero width spaces and other spaces in body\n        // text. This ensures that emojis with spaces in between or that are made\n        // up of multiple unicode characters are still counted as purely emoji\n        // messages.\n        contentBodyTrimmed = contentBodyTrimmed.replace(EMOJI_SEPARATOR_REGEX, \"\");\n\n        const match = BIGEMOJI_REGEX.exec(contentBodyTrimmed);\n        emojiBody =\n            match?.[0]?.length === contentBodyTrimmed.length &&\n            // Prevent user pills expanding for users with only emoji in\n            // their username. Permalinks (links in pills) can be any URL\n            // now, so we just check for an HTTP-looking thing.\n            (eventInfo.strippedBody === eventInfo.safeBody || // replies have the html fallbacks, account for that here\n                content.formatted_body === undefined ||\n                (!content.formatted_body.includes(\"http:\") && !content.formatted_body.includes(\"https:\")));\n    }\n\n    const className = classNames({\n        \"mx_EventTile_body\": true,\n        \"mx_EventTile_bigEmoji\": emojiBody,\n        \"markdown-body\": eventInfo.isHtmlMessage && !emojiBody,\n        // Override the global `notranslate` class set by the top-level `matrixchat` div.\n        \"translate\": true,\n    });\n\n    let formattedBody = eventInfo.safeBody;\n    if (eventInfo.isFormattedBody && eventInfo.bodyHasEmoji && eventInfo.safeBody) {\n        // This has to be done after the emojiBody check as to not break big emoji on replies\n        formattedBody = formatEmojis(eventInfo.safeBody, true).join(\"\");\n    }\n\n    let emojiBodyElements: JSX.Element[] | undefined;\n    if (!eventInfo.safeBody && eventInfo.bodyHasEmoji) {\n        emojiBodyElements = formatEmojis(eventInfo.strippedBody, false) as JSX.Element[];\n    }\n\n    return { strippedBody: eventInfo.strippedBody, formattedBody, emojiBodyElements, className };\n}\n\n/**\n * Turn a matrix event body into html\n *\n * content: 'content' of the MatrixEvent\n *\n * highlights: optional list of words to highlight, ordered by longest word first\n *\n * opts.highlightLink: optional href to add to highlighted words\n * opts.disableBigEmoji: optional argument to disable the big emoji 