@lexical/markdown
Version:
This package contains Markdown helpers and functionality for Lexical.
1,322 lines (1,269 loc) • 65 kB
JavaScript
/**
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*
*/
'use strict';
var lexical = require('lexical');
var list = require('@lexical/list');
var richText = require('@lexical/rich-text');
var utils = require('@lexical/utils');
var code = require('@lexical/code');
var link = require('@lexical/link');
/**
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*
*/
function indexBy(list, callback) {
const index = {};
for (const item of list) {
const key = callback(item);
if (!key) {
continue;
}
if (index[key]) {
index[key].push(item);
} else {
index[key] = [item];
}
}
return index;
}
function transformersByType(transformers) {
const byType = indexBy(transformers, t => t.type);
return {
element: byType.element || [],
multilineElement: byType['multiline-element'] || [],
textFormat: byType['text-format'] || [],
textMatch: byType['text-match'] || []
};
}
const PUNCTUATION_OR_SPACE = /[!-/:-@[-`{-~\s]/;
const WHITESPACE = /[ \t\n\r\f]/;
const PUNCTUATION = /[!"#$%&'()*+,\-./:;<=>?@[\]^_`{|}~]/;
const MARKDOWN_EMPTY_LINE_REG_EXP = /^\s{0,3}$/;
function isEmptyParagraph(node) {
if (!lexical.$isParagraphNode(node)) {
return false;
}
const firstChild = node.getFirstChild();
return firstChild == null || node.getChildrenSize() === 1 && lexical.$isTextNode(firstChild) && MARKDOWN_EMPTY_LINE_REG_EXP.test(firstChild.getTextContent());
}
/**
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*
*/
/**
* Renders string from markdown. The selection is moved to the start after the operation.
*/
function createMarkdownExport(transformers, shouldPreserveNewLines = false) {
const byType = transformersByType(transformers);
const elementTransformers = [...byType.multilineElement, ...byType.element];
const isNewlineDelimited = !shouldPreserveNewLines;
// Export only uses text formats that are responsible for single format
// e.g. it will filter out *** (bold, italic) and instead use separate ** and *
const textFormatTransformers = byType.textFormat.filter(transformer => transformer.format.length === 1)
// Make sure all text transformers that contain 'code' in their format are at the end of the array. Otherwise, formatted code like
// <strong><code>code</code></strong> will be exported as `**Bold Code**`, as the code format will be applied first, and the bold format
// will be applied second and thus skipped entirely, as the code format will prevent any further formatting.
.sort((a, b) => {
return Number(a.format.includes('code')) - Number(b.format.includes('code'));
});
return node => {
const output = [];
const children = (node || lexical.$getRoot()).getChildren();
for (let i = 0; i < children.length; i++) {
const child = children[i];
const result = exportTopLevelElements(child, elementTransformers, textFormatTransformers, byType.textMatch);
if (result != null) {
output.push(
// separate consecutive group of texts with a line break: eg. ["hello", "world"] -> ["hello", "/nworld"]
isNewlineDelimited && i > 0 && !isEmptyParagraph(child) && !isEmptyParagraph(children[i - 1]) ? '\n'.concat(result) : result);
}
}
// Ensure consecutive groups of texts are at least \n\n apart while each empty paragraph render as a newline.
// Eg. ["hello", "", "", "hi", "\nworld"] -> "hello\n\n\nhi\n\nworld"
return output.join('\n');
};
}
function exportTopLevelElements(node, elementTransformers, textTransformersIndex, textMatchTransformers) {
for (const transformer of elementTransformers) {
if (!transformer.export) {
continue;
}
const result = transformer.export(node, _node => exportChildren(_node, textTransformersIndex, textMatchTransformers));
if (result != null) {
return result;
}
}
if (lexical.$isElementNode(node)) {
return exportChildren(node, textTransformersIndex, textMatchTransformers);
} else if (lexical.$isDecoratorNode(node)) {
return node.getTextContent();
} else {
return null;
}
}
function exportChildren(node, textTransformersIndex, textMatchTransformers, unclosedTags, unclosableTags) {
const output = [];
const children = node.getChildren();
// keep track of unclosed tags from the very beginning
if (!unclosedTags) {
unclosedTags = [];
}
if (!unclosableTags) {
unclosableTags = [];
}
mainLoop: for (const child of children) {
for (const transformer of textMatchTransformers) {
if (!transformer.export) {
continue;
}
const result = transformer.export(child, parentNode => exportChildren(parentNode, textTransformersIndex, textMatchTransformers, unclosedTags,
// Add current unclosed tags to the list of unclosable tags - we don't want nested tags from
// textmatch transformers to close the outer ones, as that may result in invalid markdown.
// E.g. **text [text**](https://lexical.io)
// is invalid markdown, as the closing ** is inside the link.
//
[...unclosableTags, ...unclosedTags]), (textNode, textContent) => exportTextFormat(textNode, textContent, textTransformersIndex, unclosedTags, unclosableTags));
if (result != null) {
output.push(result);
continue mainLoop;
}
}
if (lexical.$isLineBreakNode(child)) {
output.push('\n');
} else if (lexical.$isTextNode(child)) {
output.push(exportTextFormat(child, child.getTextContent(), textTransformersIndex, unclosedTags, unclosableTags));
} else if (lexical.$isElementNode(child)) {
// empty paragraph returns ""
output.push(exportChildren(child, textTransformersIndex, textMatchTransformers, unclosedTags, unclosableTags));
} else if (lexical.$isDecoratorNode(child)) {
output.push(child.getTextContent());
}
}
return output.join('');
}
function exportTextFormat(node, textContent, textTransformers,
// unclosed tags include the markdown tags that haven't been closed yet, and their associated formats
unclosedTags, unclosableTags) {
// This function handles the case of a string looking like this: " foo "
// Where it would be invalid markdown to generate: "** foo **"
// If the node has no format, we use the original text.
// Otherwise, we escape leading and trailing whitespaces to their corresponding code points,
// ensuring the returned string maintains its original formatting, e.g., "**   foo   **".
let output = node.getFormat() === 0 ? textContent : escapeLeadingAndTrailingWhitespaces(textContent);
if (!node.hasFormat('code')) {
// Escape any markdown characters in the text content
output = output.replace(/([*_`~\\])/g, '\\$1');
}
// the opening tags to be added to the result
let openingTags = '';
// the closing tags to be added to the result
let closingTagsBefore = '';
let closingTagsAfter = '';
const prevNode = getTextSibling(node, true);
const nextNode = getTextSibling(node, false);
const applied = new Set();
for (const transformer of textTransformers) {
const format = transformer.format[0];
const tag = transformer.tag;
// dedup applied formats
if (hasFormat(node, format) && !applied.has(format)) {
// Multiple tags might be used for the same format (*, _)
applied.add(format);
// append the tag to openingTags, if it's not applied to the previous nodes,
// or the nodes before that (which would result in an unclosed tag)
if (!hasFormat(prevNode, format) || !unclosedTags.find(element => element.tag === tag)) {
unclosedTags.push({
format,
tag
});
openingTags += tag;
}
}
}
// close any tags in the same order they were applied, if necessary
for (let i = 0; i < unclosedTags.length; i++) {
const nodeHasFormat = hasFormat(node, unclosedTags[i].format);
const nextNodeHasFormat = hasFormat(nextNode, unclosedTags[i].format);
// prevent adding closing tag if next sibling will do it
if (nodeHasFormat && nextNodeHasFormat) {
continue;
}
const unhandledUnclosedTags = [...unclosedTags]; // Shallow copy to avoid modifying the original array
while (unhandledUnclosedTags.length > i) {
const unclosedTag = unhandledUnclosedTags.pop();
// If tag is unclosable, don't close it and leave it in the original array,
// So that it can be closed when it's no longer unclosable
if (unclosableTags && unclosedTag && unclosableTags.find(element => element.tag === unclosedTag.tag)) {
continue;
}
if (unclosedTag && typeof unclosedTag.tag === 'string') {
if (!nodeHasFormat) {
// Handles cases where the tag has not been closed before, e.g. if the previous node
// was a text match transformer that did not account for closing tags of the next node (e.g. a link)
closingTagsBefore += unclosedTag.tag;
} else if (!nextNodeHasFormat) {
closingTagsAfter += unclosedTag.tag;
}
}
// Mutate the original array to remove the closed tag
unclosedTags.pop();
}
break;
}
output = openingTags + output + closingTagsAfter;
// Replace trimmed version of textContent ensuring surrounding whitespace is not modified
return closingTagsBefore + output;
}
function getTextSibling(node, backward) {
const sibling = backward ? node.getPreviousSibling() : node.getNextSibling();
if (lexical.$isTextNode(sibling)) {
return sibling;
}
return null;
}
function hasFormat(node, format) {
return lexical.$isTextNode(node) && node.hasFormat(format);
}
function escapeLeadingAndTrailingWhitespaces(textContent) {
return textContent.replace(/^\s+|\s+$/g, match => {
return [...match].map(char => '&#' + char.codePointAt(0) + ';').join('');
});
}
/**
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*
*/
function findOutermostTextFormatTransformer(textNode, textFormatTransformersIndex) {
const textContent = textNode.getTextContent();
// Find code span first. Emphasis delimiters inside inline elements (e.g., code spans)
// should not be processed. Currently only code spans are handled; other inline elements
// (e.g., links, raw HTML) may need similar treatment in the future.
const codeRegex = textFormatTransformersIndex.fullMatchRegExpByTag['`'];
const codeTransformer = textFormatTransformersIndex.transformersByTag['`'];
const excludeRanges = [];
let codeMatch = null;
if (codeRegex && codeTransformer) {
const globalRegex = new RegExp(codeRegex.source, 'g');
const matches = Array.from(textContent.matchAll(globalRegex));
for (const match of matches) {
const startIndex = match.index;
const endIndex = startIndex + match[0].length;
if (!codeMatch) {
codeMatch = {
content: match[2],
endIndex,
startIndex,
tag: '`'
};
}
excludeRanges.push({
end: endIndex,
start: startIndex
});
}
}
const delimiters = scanDelimiters(textContent, textFormatTransformersIndex, excludeRanges);
const emphasisMatch = delimiters.length > 0 ? processEmphasis(textContent, delimiters, textFormatTransformersIndex) : null;
let resultMatch = null;
let resultTransformer = null;
if (codeMatch && emphasisMatch) {
if (emphasisMatch.startIndex <= codeMatch.startIndex && emphasisMatch.endIndex >= codeMatch.endIndex) {
resultMatch = emphasisMatch;
resultTransformer = textFormatTransformersIndex.transformersByTag[emphasisMatch.tag];
} else {
resultMatch = codeMatch;
resultTransformer = codeTransformer;
}
} else if (codeMatch) {
resultMatch = codeMatch;
resultTransformer = codeTransformer;
} else if (emphasisMatch) {
resultMatch = emphasisMatch;
resultTransformer = textFormatTransformersIndex.transformersByTag[emphasisMatch.tag];
}
if (!resultMatch || !resultTransformer) {
return null;
}
const regexMatch = [textContent.slice(resultMatch.startIndex, resultMatch.endIndex), resultMatch.tag, resultMatch.content];
regexMatch.index = resultMatch.startIndex;
regexMatch.input = textContent;
return {
endIndex: resultMatch.endIndex,
match: regexMatch,
startIndex: resultMatch.startIndex,
transformer: resultTransformer
};
}
function scanDelimiters(text, transformersIndex, excludeRanges = []) {
const delimiters = [];
const delimiterChars = new Set(Object.keys(transformersIndex.transformersByTag).filter(tag => tag[0] !== '`').map(tag => tag[0]));
const isEscaped = index => {
let count = 0;
for (let i = index - 1; i >= 0 && text[i] === '\\'; i--) {
count++;
}
return count % 2 === 1;
};
const isInExcludedRange = index => {
return excludeRanges.some(range => index >= range.start && index < range.end);
};
let i = 0;
while (i < text.length) {
const char = text[i];
if (!delimiterChars.has(char) || isEscaped(i) || isInExcludedRange(i)) {
i++;
continue;
}
let len = 1;
while (i + len < text.length && text[i + len] === char) {
len++;
}
const canOpen = canEmphasis(char, text, i, len, true);
const canClose = canEmphasis(char, text, i, len, false);
if (canOpen || canClose) {
delimiters.push({
active: true,
canClose,
canOpen,
char,
index: i,
length: len,
originalLength: len
});
}
i += len;
}
return delimiters;
}
function processEmphasis(text, delimiters, transformersIndex) {
const openersBottom = {};
let currentPos = 0;
let result = null;
while (currentPos < delimiters.length) {
const closer = delimiters[currentPos];
if (!closer.active || !closer.canClose || closer.length === 0) {
currentPos++;
continue;
}
const bottomKey = `${closer.char}${closer.canOpen}`;
const bottom = openersBottom[bottomKey] ?? -1;
let foundOpener = false;
for (let openIdx = currentPos - 1; openIdx > bottom; openIdx--) {
const opener = delimiters[openIdx];
if (!opener.active || !opener.canOpen || opener.length === 0 || opener.char !== closer.char) {
continue;
}
// Rule of 3
if (opener.canClose || closer.canOpen) {
const sum = opener.originalLength + closer.originalLength;
if (sum % 3 === 0 && opener.originalLength % 3 !== 0 && closer.originalLength % 3 !== 0) {
continue;
}
}
const maxLen = Math.min(opener.length, closer.length);
const matchedTag = Object.keys(transformersIndex.transformersByTag).filter(t => t[0] === opener.char && t.length <= maxLen).sort((a, b) => b.length - a.length)[0];
if (!matchedTag) {
continue;
}
foundOpener = true;
const matchLen = matchedTag.length;
const match = {
content: text.slice(opener.index + opener.length, closer.index),
endIndex: closer.index + matchLen,
startIndex: opener.index + (opener.length - matchLen),
tag: matchedTag
};
if (!result || match.startIndex < result.startIndex || match.startIndex === result.startIndex && match.endIndex > result.endIndex) {
result = match;
}
for (let j = openIdx + 1; j < currentPos; j++) {
delimiters[j].active = false;
}
opener.length -= matchLen;
closer.length -= matchLen;
opener.active = opener.length > 0;
if (closer.length > 0) {
closer.index += matchLen;
} else {
closer.active = false;
currentPos++;
}
break;
}
if (!foundOpener) {
openersBottom[bottomKey] = currentPos - 1;
if (!closer.canOpen) {
closer.active = false;
}
currentPos++;
}
}
return result;
}
function canEmphasis(char, text, index, length, isOpen) {
if (!isFlanking(text, index, length, isOpen)) {
return false;
}
if (char === '*') {
return true;
}
if (char === '_') {
if (!isFlanking(text, index, length, !isOpen)) {
return true;
}
const adjacentChar = isOpen ? text[index - 1] : text[index + length];
return adjacentChar !== undefined && PUNCTUATION.test(adjacentChar);
}
return true;
}
function isFlanking(text, index, length, isLeft) {
const charBefore = text[index - 1];
const charAfter = text[index + length];
const [primary, secondary] = isLeft ? [charAfter, charBefore] : [charBefore, charAfter];
if (primary === undefined || WHITESPACE.test(primary)) {
return false;
}
if (!PUNCTUATION.test(primary)) {
return true;
}
return secondary === undefined || WHITESPACE.test(secondary) || PUNCTUATION.test(secondary);
}
function importTextFormatTransformer(textNode, startIndex, endIndex, transformer, match) {
const textContent = textNode.getTextContent();
// No text matches - we can safely process the text format match
let transformedNode, nodeAfter, nodeBefore;
// If matching full content there's no need to run splitText and can reuse existing textNode
// to update its content and apply format. E.g. for **_Hello_** string after applying bold
// format (**) it will reuse the same text node to apply italic (_)
if (match[0] === textContent) {
transformedNode = textNode;
} else {
if (startIndex === 0) {
[transformedNode, nodeAfter] = textNode.splitText(endIndex);
} else {
[nodeBefore, transformedNode, nodeAfter] = textNode.splitText(startIndex, endIndex);
}
}
transformedNode.setTextContent(match[2]);
if (transformer) {
for (const format of transformer.format) {
if (!transformedNode.hasFormat(format)) {
transformedNode.toggleFormat(format);
}
}
}
return {
nodeAfter: nodeAfter,
nodeBefore: nodeBefore,
transformedNode: transformedNode
};
}
/**
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*
*/
function findOutermostTextMatchTransformer(textNode_, textMatchTransformers) {
const textNode = textNode_;
let foundMatchStartIndex = undefined;
let foundMatchEndIndex = undefined;
let foundMatchTransformer = undefined;
let foundMatch = undefined;
for (const transformer of textMatchTransformers) {
if (!transformer.replace || !transformer.importRegExp) {
continue;
}
const match = textNode.getTextContent().match(transformer.importRegExp);
if (!match) {
continue;
}
const startIndex = match.index || 0;
const endIndex = transformer.getEndIndex ? transformer.getEndIndex(textNode, match) : startIndex + match[0].length;
if (endIndex === false) {
continue;
}
if (foundMatchStartIndex === undefined || foundMatchEndIndex === undefined ||
// Wraps previous match or is strictly before it.
startIndex < foundMatchStartIndex && (endIndex > foundMatchEndIndex || endIndex <= foundMatchStartIndex)) {
foundMatchStartIndex = startIndex;
foundMatchEndIndex = endIndex;
foundMatchTransformer = transformer;
foundMatch = match;
}
}
if (foundMatchStartIndex === undefined || foundMatchEndIndex === undefined || foundMatchTransformer === undefined || foundMatch === undefined) {
return null;
}
return {
endIndex: foundMatchEndIndex,
match: foundMatch,
startIndex: foundMatchStartIndex,
transformer: foundMatchTransformer
};
}
function importFoundTextMatchTransformer(textNode, startIndex, endIndex, transformer, match) {
let transformedNode, nodeAfter, nodeBefore;
if (startIndex === 0) {
[transformedNode, nodeAfter] = textNode.splitText(endIndex);
} else {
[nodeBefore, transformedNode, nodeAfter] = textNode.splitText(startIndex, endIndex);
}
if (!transformer.replace) {
return null;
}
const potentialTransformedNode = transformer.replace(transformedNode, match);
return {
nodeAfter,
nodeBefore,
transformedNode: potentialTransformedNode || undefined
};
}
/**
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*
*/
/**
* Returns true if the node can contain transformable markdown.
* Code nodes cannot contain transformable markdown.
* For example, `code **bold**` should not be transformed to
* <code>code <strong>bold</strong></code>.
*/
function canContainTransformableMarkdown(node) {
return lexical.$isTextNode(node) && !node.hasFormat('code');
}
/**
* Handles applying both text format and text match transformers.
* It finds the outermost text format or text match and applies it,
* then recursively calls itself to apply the next outermost transformer,
* until there are no more transformers to apply.
*/
function importTextTransformers(textNode, textFormatTransformersIndex, textMatchTransformers) {
let foundTextFormat = findOutermostTextFormatTransformer(textNode, textFormatTransformersIndex);
let foundTextMatch = findOutermostTextMatchTransformer(textNode, textMatchTransformers);
if (foundTextFormat && foundTextMatch) {
// Find the outermost transformer
if (foundTextFormat.startIndex <= foundTextMatch.startIndex && foundTextFormat.endIndex >= foundTextMatch.endIndex ||
// foundTextMatch is not contained within foundTextFormat
foundTextMatch.startIndex > foundTextFormat.endIndex) {
// foundTextFormat wraps foundTextMatch - apply foundTextFormat by setting foundTextMatch to null
foundTextMatch = null;
} else {
// foundTextMatch wraps foundTextFormat - apply foundTextMatch by setting foundTextFormat to null
foundTextFormat = null;
}
}
if (foundTextFormat) {
const result = importTextFormatTransformer(textNode, foundTextFormat.startIndex, foundTextFormat.endIndex, foundTextFormat.transformer, foundTextFormat.match);
if (canContainTransformableMarkdown(result.nodeAfter)) {
importTextTransformers(result.nodeAfter, textFormatTransformersIndex, textMatchTransformers);
}
if (canContainTransformableMarkdown(result.nodeBefore)) {
importTextTransformers(result.nodeBefore, textFormatTransformersIndex, textMatchTransformers);
}
if (canContainTransformableMarkdown(result.transformedNode)) {
importTextTransformers(result.transformedNode, textFormatTransformersIndex, textMatchTransformers);
}
} else if (foundTextMatch) {
const result = importFoundTextMatchTransformer(textNode, foundTextMatch.startIndex, foundTextMatch.endIndex, foundTextMatch.transformer, foundTextMatch.match);
if (!result) {
return;
}
if (canContainTransformableMarkdown(result.nodeAfter)) {
importTextTransformers(result.nodeAfter, textFormatTransformersIndex, textMatchTransformers);
}
if (canContainTransformableMarkdown(result.nodeBefore)) {
importTextTransformers(result.nodeBefore, textFormatTransformersIndex, textMatchTransformers);
}
if (canContainTransformableMarkdown(result.transformedNode)) {
importTextTransformers(result.transformedNode, textFormatTransformersIndex, textMatchTransformers);
}
}
// Handle escape characters
const textContent = textNode.getTextContent();
const escapedText = textContent.replace(/\\([*_`~\\])/g, '$1').replace(/&#(\d+);/g, (_, codePoint) => {
return String.fromCodePoint(codePoint);
});
textNode.setTextContent(escapedText);
}
/**
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*
*/
/**
* Renders markdown from a string. The selection is moved to the start after the operation.
*/
function createMarkdownImport(transformers, shouldPreserveNewLines = false) {
const byType = transformersByType(transformers);
const textFormatTransformersIndex = createTextFormatTransformersIndex(byType.textFormat);
return (markdownString, node) => {
const lines = markdownString.split('\n');
const linesLength = lines.length;
const root = node || lexical.$getRoot();
root.clear();
for (let i = 0; i < linesLength; i++) {
const lineText = lines[i];
const [imported, shiftedIndex] = $importMultiline(lines, i, byType.multilineElement, root);
if (imported) {
// If a multiline markdown element was imported, we don't want to process the lines that were part of it anymore.
// There could be other sub-markdown elements (both multiline and normal ones) matching within this matched multiline element's children.
// However, it would be the responsibility of the matched multiline transformer to decide how it wants to handle them.
// We cannot handle those, as there is no way for us to know how to maintain the correct order of generated lexical nodes for possible children.
i = shiftedIndex; // Next loop will start from the line after the last line of the multiline element
continue;
}
$importBlocks(lineText, root, byType.element, textFormatTransformersIndex, byType.textMatch, shouldPreserveNewLines);
}
// By default, removing empty paragraphs as md does not really
// allow empty lines and uses them as delimiter.
// If you need empty lines set shouldPreserveNewLines = true.
const children = root.getChildren();
for (const child of children) {
if (!shouldPreserveNewLines && isEmptyParagraph(child) && root.getChildrenSize() > 1) {
child.remove();
}
}
if (lexical.$getSelection() !== null) {
root.selectStart();
}
};
}
/**
*
* @returns first element of the returned tuple is a boolean indicating if a multiline element was imported. The second element is the index of the last line that was processed.
*/
function $importMultiline(lines, startLineIndex, multilineElementTransformers, rootNode) {
for (const transformer of multilineElementTransformers) {
const {
handleImportAfterStartMatch,
regExpEnd,
regExpStart,
replace
} = transformer;
const startMatch = lines[startLineIndex].match(regExpStart);
if (!startMatch) {
continue; // Try next transformer
}
if (handleImportAfterStartMatch) {
const result = handleImportAfterStartMatch({
lines,
rootNode,
startLineIndex,
startMatch,
transformer
});
if (result === null) {
continue;
} else if (result) {
return result;
}
}
const regexpEndRegex = typeof regExpEnd === 'object' && 'regExp' in regExpEnd ? regExpEnd.regExp : regExpEnd;
const isEndOptional = regExpEnd && typeof regExpEnd === 'object' && 'optional' in regExpEnd ? regExpEnd.optional : !regExpEnd;
let endLineIndex = startLineIndex;
const linesLength = lines.length;
// check every single line for the closing match. It could also be on the same line as the opening match.
while (endLineIndex < linesLength) {
const endMatch = regexpEndRegex ? lines[endLineIndex].match(regexpEndRegex) : null;
if (!endMatch) {
if (!isEndOptional || isEndOptional && endLineIndex < linesLength - 1 // Optional end, but didn't reach the end of the document yet => continue searching for potential closing match
) {
endLineIndex++;
continue; // Search next line for closing match
}
}
// Now, check if the closing match matched is the same as the opening match.
// If it is, we need to continue searching for the actual closing match.
if (endMatch && startLineIndex === endLineIndex && endMatch.index === startMatch.index) {
endLineIndex++;
continue; // Search next line for closing match
}
// At this point, we have found the closing match. Next: calculate the lines in between open and closing match
// This should not include the matches themselves, and be split up by lines
const linesInBetween = [];
if (endMatch && startLineIndex === endLineIndex) {
linesInBetween.push(lines[startLineIndex].slice(startMatch[0].length, -endMatch[0].length));
} else {
for (let i = startLineIndex; i <= endLineIndex; i++) {
if (i === startLineIndex) {
const text = lines[i].slice(startMatch[0].length);
linesInBetween.push(text); // Also include empty text
} else if (i === endLineIndex && endMatch) {
const text = lines[i].slice(0, -endMatch[0].length);
linesInBetween.push(text); // Also include empty text
} else {
linesInBetween.push(lines[i]);
}
}
}
if (replace(rootNode, null, startMatch, endMatch, linesInBetween, true) !== false) {
// Return here. This $importMultiline function is run line by line and should only process a single multiline element at a time.
return [true, endLineIndex];
}
// The replace function returned false, despite finding the matching open and close tags => this transformer does not want to handle it.
// Thus, we continue letting the remaining transformers handle the passed lines of text from the beginning
break;
}
}
// No multiline transformer handled this line successfully
return [false, startLineIndex];
}
function $importBlocks(lineText, rootNode, elementTransformers, textFormatTransformersIndex, textMatchTransformers, shouldPreserveNewLines) {
const textNode = lexical.$createTextNode(lineText);
const elementNode = lexical.$createParagraphNode();
elementNode.append(textNode);
rootNode.append(elementNode);
for (const {
regExp,
replace
} of elementTransformers) {
const match = lineText.match(regExp);
if (match) {
textNode.setTextContent(lineText.slice(match[0].length));
if (replace(elementNode, [textNode], match, true) !== false) {
break;
}
}
}
importTextTransformers(textNode, textFormatTransformersIndex, textMatchTransformers);
// If no transformer found and we left with original paragraph node
// can check if its content can be appended to the previous node
// if it's a paragraph, quote or list
if (elementNode.isAttached() && lineText.length > 0) {
const previousNode = elementNode.getPreviousSibling();
if (!shouldPreserveNewLines && (
// Only append if we're not preserving newlines
lexical.$isParagraphNode(previousNode) || richText.$isQuoteNode(previousNode) || list.$isListNode(previousNode))) {
let targetNode = previousNode;
if (list.$isListNode(previousNode)) {
const lastDescendant = previousNode.getLastDescendant();
if (lastDescendant == null) {
targetNode = null;
} else {
targetNode = utils.$findMatchingParent(lastDescendant, list.$isListItemNode);
}
}
if (targetNode != null && targetNode.getTextContentSize() > 0) {
targetNode.splice(targetNode.getChildrenSize(), 0, [lexical.$createLineBreakNode(), ...elementNode.getChildren()]);
elementNode.remove();
}
}
}
}
function createTextFormatTransformersIndex(textTransformers) {
const transformersByTag = {};
const fullMatchRegExpByTag = {};
const openTagsRegExp = [];
const escapeRegExp = `(?<![\\\\])`;
for (const transformer of textTransformers) {
const {
tag
} = transformer;
transformersByTag[tag] = transformer;
const tagRegExp = tag.replace(/(\*|\^|\+)/g, '\\$1');
openTagsRegExp.push(tagRegExp);
// Single-char tag (e.g. "*"),
if (tag.length === 1) {
if (tag === '`') {
// Special handling for backticks - match content with escaped backticks
fullMatchRegExpByTag[tag] = new RegExp(`(?<![\\\\\`])(\`)((?:\\\\\`|[^\`])+?)(\`)(?!\`)`);
} else {
fullMatchRegExpByTag[tag] = new RegExp(`(?<![\\\\${tagRegExp}])(${tagRegExp})((\\\\${tagRegExp})?.*?[^${tagRegExp}\\s](\\\\${tagRegExp})?)((?<!\\\\)|(?<=\\\\\\\\))(${tagRegExp})(?![\\\\${tagRegExp}])`);
}
} else {
// Multi‐char tags (e.g. "**")
fullMatchRegExpByTag[tag] = new RegExp(`(?<!\\\\)(${tagRegExp})((\\\\${tagRegExp})?.*?[^\\s](\\\\${tagRegExp})?)((?<!\\\\)|(?<=\\\\\\\\))(${tagRegExp})(?!\\\\)`);
}
}
return {
// Reg exp to find open tag + content + close tag
fullMatchRegExpByTag,
// Regexp to locate *any* potential opening tag (longest first).
openTagsRegExp: new RegExp(`${escapeRegExp}(${openTagsRegExp.join('|')})`, 'g'),
transformersByTag
};
}
/**
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*
*/
// Do not require this module directly! Use normal `invariant` calls.
function formatDevErrorMessage(message) {
throw new Error(message);
}
/**
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*
*/
const ORDERED_LIST_REGEX = /^(\s*)(\d{1,})\.\s/;
const UNORDERED_LIST_REGEX = /^(\s*)[-*+]\s/;
const CHECK_LIST_REGEX = /^(\s*)(?:[-*+]\s)?\s?(\[(\s|x)?\])\s/i;
const HEADING_REGEX = /^(#{1,6})\s/;
const QUOTE_REGEX = /^>\s/;
const CODE_START_REGEX = /^([ \t]*`{3,})([\w-]+)?[ \t]?/;
const CODE_END_REGEX = /^[ \t]*`{3,}$/;
const CODE_SINGLE_LINE_REGEX = /^[ \t]*```[^`]+(?:(?:`{1,2}|`{4,})[^`]+)*```(?:[^`]|$)/;
const TABLE_ROW_REG_EXP = /^(?:\|)(.+)(?:\|)\s?$/;
const TABLE_ROW_DIVIDER_REG_EXP = /^(\| ?:?-*:? ?)+\|\s?$/;
const TAG_START_REGEX = /^<[a-z_][\w-]*(?:\s[^<>]*)?\/?>/i;
const TAG_END_REGEX = /^<\/[a-z_][\w-]*\s*>/i;
const ENDS_WITH = regex => new RegExp(`(?:${regex.source})$`, regex.flags);
const listMarkerState = lexical.createState('mdListMarker', {
parse: v => typeof v === 'string' && /^[-*+]$/.test(v) ? v : '-'
});
const codeFenceState = lexical.createState('mdCodeFence', {
parse: val => {
if (typeof val === 'string' && /^`{3,}$/.test(val)) {
return val;
}
return '```';
}
});
const createBlockNode = createNode => {
return (parentNode, children, match, isImport) => {
const node = createNode(match);
node.append(...children);
parentNode.replace(node);
if (!isImport) {
node.select(0, 0);
}
};
};
// Amount of spaces that define indentation level
// TODO: should be an option
const LIST_INDENT_SIZE = 4;
function getIndent(whitespaces) {
const tabs = whitespaces.match(/\t/g);
const spaces = whitespaces.match(/ /g);
let indent = 0;
if (tabs) {
indent += tabs.length;
}
if (spaces) {
indent += Math.floor(spaces.length / LIST_INDENT_SIZE);
}
return indent;
}
const listReplace = listType => {
return (parentNode, children, match, isImport) => {
const previousNode = parentNode.getPreviousSibling();
const nextNode = parentNode.getNextSibling();
const listItem = list.$createListItemNode(listType === 'check' ? match[3] === 'x' : undefined);
const firstMatchChar = match[0].trim()[0];
const listMarker = (listType === 'bullet' || listType === 'check') && firstMatchChar === listMarkerState.parse(firstMatchChar) ? firstMatchChar : undefined;
if (list.$isListNode(nextNode) && nextNode.getListType() === listType) {
if (listMarker) {
lexical.$setState(nextNode, listMarkerState, listMarker);
}
const firstChild = nextNode.getFirstChild();
if (firstChild !== null) {
firstChild.insertBefore(listItem);
} else {
// should never happen, but let's handle gracefully, just in case.
nextNode.append(listItem);
}
parentNode.remove();
} else if (list.$isListNode(previousNode) && previousNode.getListType() === listType) {
if (listMarker) {
lexical.$setState(previousNode, listMarkerState, listMarker);
}
previousNode.append(listItem);
parentNode.remove();
} else {
const list$1 = list.$createListNode(listType, listType === 'number' ? Number(match[2]) : undefined);
if (listMarker) {
lexical.$setState(list$1, listMarkerState, listMarker);
}
list$1.append(listItem);
parentNode.replace(list$1);
}
listItem.append(...children);
if (!isImport) {
listItem.select(0, 0);
}
const indent = getIndent(match[1]);
if (indent) {
listItem.setIndent(indent);
}
};
};
const $listExport = (listNode, exportChildren, depth) => {
const output = [];
const children = listNode.getChildren();
let index = 0;
for (const listItemNode of children) {
if (list.$isListItemNode(listItemNode)) {
if (listItemNode.getChildrenSize() === 1) {
const firstChild = listItemNode.getFirstChild();
if (list.$isListNode(firstChild)) {
output.push($listExport(firstChild, exportChildren, depth + 1));
continue;
}
}
const indent = ' '.repeat(depth * LIST_INDENT_SIZE);
const listType = listNode.getListType();
const listMarker = lexical.$getState(listNode, listMarkerState);
const prefix = listType === 'number' ? `${listNode.getStart() + index}. ` : listType === 'check' ? `${listMarker} [${listItemNode.getChecked() ? 'x' : ' '}] ` : listMarker + ' ';
output.push(indent + prefix + exportChildren(listItemNode));
index++;
}
}
return output.join('\n');
};
const HEADING = {
dependencies: [richText.HeadingNode],
export: (node, exportChildren) => {
if (!richText.$isHeadingNode(node)) {
return null;
}
const level = Number(node.getTag().slice(1));
return '#'.repeat(level) + ' ' + exportChildren(node);
},
regExp: HEADING_REGEX,
replace: createBlockNode(match => {
const tag = 'h' + match[1].length;
return richText.$createHeadingNode(tag);
}),
type: 'element'
};
const QUOTE = {
dependencies: [richText.QuoteNode],
export: (node, exportChildren) => {
if (!richText.$isQuoteNode(node)) {
return null;
}
const lines = exportChildren(node).split('\n');
const output = [];
for (const line of lines) {
output.push('> ' + line);
}
return output.join('\n');
},
regExp: QUOTE_REGEX,
replace: (parentNode, children, _match, isImport) => {
if (isImport) {
const previousNode = parentNode.getPreviousSibling();
if (richText.$isQuoteNode(previousNode)) {
previousNode.splice(previousNode.getChildrenSize(), 0, [lexical.$createLineBreakNode(), ...children]);
parentNode.remove();
return;
}
}
const node = richText.$createQuoteNode();
node.append(...children);
parentNode.replace(node);
if (!isImport) {
node.select(0, 0);
}
},
type: 'element'
};
const CODE = {
dependencies: [code.CodeNode],
export: node => {
if (!code.$isCodeNode(node)) {
return null;
}
const textContent = node.getTextContent();
let fence = lexical.$getState(node, codeFenceState);
if (textContent.indexOf(fence) > -1) {
const backticks = textContent.match(/`{3,}/g);
if (backticks) {
const maxLength = Math.max(...backticks.map(b => b.length));
fence = '`'.repeat(maxLength + 1);
}
}
return fence + (node.getLanguage() || '') + (textContent ? '\n' + textContent : '') + '\n' + fence;
},
handleImportAfterStartMatch: ({
lines,
rootNode,
startLineIndex,
startMatch
}) => {
const fence = startMatch[1];
const fenceLength = fence.trim().length;
const currentLine = lines[startLineIndex];
const afterFenceIndex = startMatch.index + fence.length;
const afterFence = currentLine.slice(afterFenceIndex);
const singleLineEndRegex = new RegExp(`\`{${fenceLength},}$`);
if (singleLineEndRegex.test(afterFence)) {
const endMatch = afterFence.match(singleLineEndRegex);
const content = afterFence.slice(0, afterFence.lastIndexOf(endMatch[0]));
const fakeStartMatch = [...startMatch];
fakeStartMatch[2] = '';
CODE.replace(rootNode, null, fakeStartMatch, endMatch, [content], true);
return [true, startLineIndex];
}
const multilineEndRegex = new RegExp(`^[ \\t]*\`{${fenceLength},}$`);
for (let i = startLineIndex + 1; i < lines.length; i++) {
const line = lines[i];
if (multilineEndRegex.test(line)) {
const endMatch = line.match(multilineEndRegex);
const linesInBetween = lines.slice(startLineIndex + 1, i);
const afterFullMatch = currentLine.slice(startMatch[0].length);
if (afterFullMatch.length > 0) {
linesInBetween.unshift(afterFullMatch);
}
CODE.replace(rootNode, null, startMatch, endMatch, linesInBetween, true);
return [true, i];
}
}
const linesInBetween = lines.slice(startLineIndex + 1);
const afterFullMatch = currentLine.slice(startMatch[0].length);
if (afterFullMatch.length > 0) {
linesInBetween.unshift(afterFullMatch);
}
CODE.replace(rootNode, null, startMatch, null, linesInBetween, true);
return [true, lines.length - 1];
},
regExpEnd: {
optional: true,
regExp: CODE_END_REGEX
},
regExpStart: CODE_START_REGEX,
replace: (rootNode, children, startMatch, endMatch, linesInBetween, isImport) => {
let codeBlockNode;
let code$1;
const fence = startMatch[1] ? startMatch[1].trim() : '```';
const language = startMatch[2] || undefined;
if (!children && linesInBetween) {
if (linesInBetween.length === 1) {
if (endMatch) {
codeBlockNode = code.$createCodeNode(language);
code$1 = linesInBetween[0];
} else {
codeBlockNode = code.$createCodeNode(language);
code$1 = linesInBetween[0].startsWith(' ') ? linesInBetween[0].slice(1) : linesInBetween[0];
}
} else {
codeBlockNode = code.$createCodeNode(language);
if (linesInBetween.length > 0) {
if (linesInBetween[0].trim().length === 0) {
linesInBetween.shift();
} else if (linesInBetween[0].startsWith(' ')) {
linesInBetween[0] = linesInBetween[0].slice(1);
}
}
while (linesInBetween.length > 0 && !linesInBetween[linesInBetween.length - 1].length) {
linesInBetween.pop();
}
code$1 = linesInBetween.join('\n');
}
lexical.$setState(codeBlockNode, codeFenceState, fence);
const textNode = lexical.$createTextNode(code$1);
codeBlockNode.append(textNode);
rootNode.append(codeBlockNode);
} else if (children) {
createBlockNode(match => {
return code.$createCodeNode(match ? match[2] : undefined);
})(rootNode, children, startMatch, isImport);
}
},
type: 'multiline-element'
};
const UNORDERED_LIST = {
dependencies: [list.ListNode, list.ListItemNode],
export: (node, exportChildren) => {
return list.$isListNode(node) ? $listExport(node, exportChildren, 0) : null;
},
regExp: UNORDERED_LIST_REGEX,
replace: listReplace('bullet'),
type: 'element'
};
const CHECK_LIST = {
dependencies: [list.ListNode, list.ListItemNode],
export: (node, exportChildren) => {
return list.$isListNode(node) ? $listExport(node, exportChildren, 0) : null;
},
regExp: CHECK_LIST_REGEX,
replace: listReplace('check'),
type: 'element'
};
const ORDERED_LIST = {
dependencies: [list.ListNode, list.ListItemNode],
export: (node, exportChildren) => {
return list.$isListNode(node) ? $listExport(node, exportChildren, 0) : null;
},
regExp: ORDERED_LIST_REGEX,
replace: listReplace('number'),
type: 'element'
};
const INLINE_CODE = {
format: ['code'],
tag: '`',
type: 'text-format'
};
const HIGHLIGHT = {
format: ['highlight'],
tag: '==',
type: 'text-format'
};
const BOLD_ITALIC_STAR = {
format: ['bold', 'italic'],
tag: '***',
type: 'text-format'
};
const BOLD_ITALIC_UNDERSCORE = {
format: ['bold', 'italic'],
intraword: false,
tag: '___',
type: 'text-format'
};
const BOLD_STAR = {
format: ['bold'],
tag: '**',
type: 'text-format'
};
const BOLD_UNDERSCORE = {
format: ['bold'],
intraword: false,
tag: '__',
type: 'text-format'
};
const STRIKETHROUGH = {
format: ['strikethrough'],
tag: '~~',
type: 'text-format'
};
const ITALIC_STAR = {
format: ['italic'],
tag: '*',
type: 'text-format'
};
const ITALIC_UNDERSCORE = {
format: ['italic'],
intraword: false,
tag: '_',
type: 'text-format'
};
// Order of text transformers matters:
//
// - code should go first as it prevents any transformations inside
// - then longer tags match (e.g. ** or __ should go before * or _)
const LINK = {
dependencies: [link.LinkNode],
export: (node, exportChildren, exportFormat) => {
if (!link.$isLinkNode(node) || link.$isAutoLinkNode(node)) {
return null;
}
const title = node.getTitle();
const textContent = exportChildren(node);
const linkContent = title ? `[${textContent}](${node.getURL()} "${title}")` : `[${textContent}](${node.getURL()})`;
return linkContent;
},
importRegExp: /(?:\[(.+?)\])(?:\((?:([^()\s]+)(?:\s"((?:[^"]*\\")*[^"]*)"\s*)?)\))/,
regExp: /(?:\[([^[\]]*(?:\[[^[\]]*\][^[\]]*)*)\])(?:\((?:([^()\s]+)(?:\s"((?:[^"]*\\")*[^"]*)"\s*)?)\))$/,
replace: (textNode, match) => {
// https://spec.commonmark.org/0.31.2/#inline-link
if (lexical.$findMatchingParent(textNode, link.$isLinkNode)) {
return;
}
const [, linkText, linkUrl, linkTitle] = match;
const linkNode = link.$createLinkNode(linkUrl, {
title: linkTitle
});
const openBracketAmount = linkText.split('[').length - 1;
const closeBracketAmount = linkText.split(']').length - 1;
let parsedLinkText = linkText;
let outsideLinkText = '';
if (openBracketAmount < closeBracketAmount) {
return;
} else if (openBracketAmount > closeBracketAmount) {
const linkTextParts = linkText.split('[');
outsideLinkText = '[' + linkTextParts[0];
parsedLinkText = linkTextParts.slice(1).join('[');
}
const linkTextNode = lexical.$createTextNode(parsedLinkText);
linkTextNode.setFormat(textNode.getFormat());
linkNode.append(linkTextNode);
textNode.replace(linkNode);
if (outsideLinkText) {
linkNode.insertBefore(lexical.$createTextNode(outsideLinkText));
}
return linkTextNode;
},
trigger: ')',
type: 'text-match'
};
const ELEMENT_TRANSFORMERS = [HEADING, QUOTE, UNORDERED_LIST, ORDERED_LIST];
const MULTILINE_ELEMENT_TRANSFORMERS = [CODE];
// Order of text format transformers matters:
//
// - code should go first as it prevents any transformations inside
// - then longer tags match (e.g. ** or __ should go before * or _)
const TEXT_FORMAT_TRANSFORMERS = [INLINE_CODE, BOLD_ITALIC_STAR, BOLD_ITALIC_UNDERSCORE, BOLD_STAR, BOLD_UNDERSCORE, HIGHLIGHT, ITALIC_STAR, ITALIC_UNDERSCORE, STRIKETHROUGH];
const TEXT_MATCH_TRANSFORMERS = [LINK];
const TRANSFORMERS = [...ELEMENT_TRANSFORMERS, ...MULTILINE_ELEMENT_TRANSFORMERS, ...TEXT_FORMAT_TRANSFORMERS, ...TEXT_MATCH_TRANSFORMERS];
function normalizeMarkdown(input, shouldMergeAdjacentLines = false) {
const lines = input.split('\n');
let inCodeBlock = false;
const sanitizedLines = [];
for (let i = 0; i < lines.length; i++) {
const line = lines[i].trimEnd();
const lastLine = sanitizedLines[sanitizedLines.length - 1];
// Code blocks of ```single line``` don't toggle the inCodeBlock flag
if (CODE_SINGLE_LINE_REGEX.test(line)) {
sanitizedLines.push(line);
continue;
}
// Detect the start or end of a code block
if (CODE_START_REGEX.test(line) || CODE_END_REGEX.test(line)) {
inCodeBlock = !inCodeBlock;
sanitizedLines.push(line);
continue;
}
// If we are inside a code block, keep the line unchanged
if (inCodeBlock) {
sanitizedLines.push(line);
continue;
}
// In markdown the concept of "empty paragraphs" does not exist.
// Blocks must be separated by an empty line. Non-empty adjacent lines must be merged.
if (line === '' || lastLine === '' || !lastLine || HEADING_REGEX.test(lastLine) || HEADING_REGEX.test(line) || QUOTE_REGEX.test(line) || ORDERED_LIST_REGEX.test(line) || UNORDERED_LIST_REGEX.test(line) || CHECK_LIST_REGEX.test(line) || TABLE_ROW_REG_EXP.test(line) || TABLE_ROW_DIVIDER_REG_EXP.test(line) || !shouldMergeAdjacentLines || TAG_START_REGEX.test(line) || TAG_END_REGEX.test(line) || ENDS_WITH(TAG_END_REGEX).test(lastLine) || ENDS_WITH(TAG_START_REGEX).test(lastLine) || CODE_END_REGEX.test(lastLine)) {
sanitizedLines.push(line);
} else {
sanitizedLines[sanitizedLines.length - 1] = lastLine + ' ' + line.trimStart();
}
}
return sanitizedLines.join('\n');
}
function runElementTransformers(parentNode, anchorNode, anchorOffset, elementTransformers) {
const grandParentNode = parentNode.getParent();
if (!lexical.$isRootOrShadowRoot(grandParentNode) || parentNode.getFirstChild() !== anchorNode) {
return false;
}
const textContent = anchorNode.getTextContent();
// Checking for anchorOffset position to prevent any checks for cases when caret is too far
// from a line start to be a part of block-level markdown trigger.
//
// TODO:
// Can have a quick check if caret is close enough to the beginning of the string (e.g. offset less than 10-20)
// since otherwis