UNPKG

@lexical/markdown

Version:

This package contains Markdown helpers and functionality for Lexical.

github.com/facebook/lexical

facebook/lexical

1,322 lines (1,269 loc) • 65 kB

JavaScript

/** * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. * */ 'use strict'; var lexical = require('lexical'); var list = require('@lexical/list'); var richText = require('@lexical/rich-text'); var utils = require('@lexical/utils'); var code = require('@lexical/code'); var link = require('@lexical/link'); /** * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. * */ function indexBy(list, callback) { const index = {}; for (const item of list) { const key = callback(item); if (!key) { continue; } if (index[key]) { index[key].push(item); } else { index[key] = [item]; } } return index; } function transformersByType(transformers) { const byType = indexBy(transformers, t => t.type); return { element: byType.element || [], multilineElement: byType['multiline-element'] || [], textFormat: byType['text-format'] || [], textMatch: byType['text-match'] || [] }; } const PUNCTUATION_OR_SPACE = /[!-/:-@[-`{-~\s]/; const WHITESPACE = /[ \t\n\r\f]/; const PUNCTUATION = /[!"#$%&'()*+,\-./:;<=>?@[\]^_`{|}~]/; const MARKDOWN_EMPTY_LINE_REG_EXP = /^\s{0,3}$/; function isEmptyParagraph(node) { if (!lexical.$isParagraphNode(node)) { return false; } const firstChild = node.getFirstChild(); return firstChild == null || node.getChildrenSize() === 1 && lexical.$isTextNode(firstChild) && MARKDOWN_EMPTY_LINE_REG_EXP.test(firstChild.getTextContent()); } /** * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. * */ /** * Renders string from markdown. The selection is moved to the start after the operation. */ function createMarkdownExport(transformers, shouldPreserveNewLines = false) { const byType = transformersByType(transformers); const elementTransformers = [...byType.multilineElement, ...byType.element]; const isNewlineDelimited = !shouldPreserveNewLines; // Export only uses text formats that are responsible for single format // e.g. it will filter out *** (bold, italic) and instead use separate ** and * const textFormatTransformers = byType.textFormat.filter(transformer => transformer.format.length === 1) // Make sure all text transformers that contain 'code' in their format are at the end of the array. Otherwise, formatted code like // <strong><code>code</code></strong> will be exported as `**Bold Code**`, as the code format will be applied first, and the bold format // will be applied second and thus skipped entirely, as the code format will prevent any further formatting. .sort((a, b) => { return Number(a.format.includes('code')) - Number(b.format.includes('code')); }); return node => { const output = []; const children = (node || lexical.$getRoot()).getChildren(); for (let i = 0; i < children.length; i++) { const child = children[i]; const result = exportTopLevelElements(child, elementTransformers, textFormatTransformers, byType.textMatch); if (result != null) { output.push( // separate consecutive group of texts with a line break: eg. ["hello", "world"] -> ["hello", "/nworld"] isNewlineDelimited && i > 0 && !isEmptyParagraph(child) && !isEmptyParagraph(children[i - 1]) ? '\n'.concat(result) : result); } } // Ensure consecutive groups of texts are at least \n\n apart while each empty paragraph render as a newline. // Eg. ["hello", "", "", "hi", "\nworld"] -> "hello\n\n\nhi\n\nworld" return output.join('\n'); }; } function exportTopLevelElements(node, elementTransformers, textTransformersIndex, textMatchTransformers) { for (const transformer of elementTransformers) { if (!transformer.export) { continue; } const result = transformer.export(node, _node => exportChildren(_node, textTransformersIndex, textMatchTransformers)); if (result != null) { return result; } } if (lexical.$isElementNode(node)) { return exportChildren(node, textTransformersIndex, textMatchTransformers); } else if (lexical.$isDecoratorNode(node)) { return node.getTextContent(); } else { return null; } } function exportChildren(node, textTransformersIndex, textMatchTransformers, unclosedTags, unclosableTags) { const output = []; const children = node.getChildren(); // keep track of unclosed tags from the very beginning if (!unclosedTags) { unclosedTags = []; } if (!unclosableTags) { unclosableTags = []; } mainLoop: for (const child of children) { for (const transformer of textMatchTransformers) { if (!transformer.export) { continue; } const result = transformer.export(child, parentNode => exportChildren(parentNode, textTransformersIndex, textMatchTransformers, unclosedTags, // Add current unclosed tags to the list of unclosable tags - we don't want nested tags from // textmatch transformers to close the outer ones, as that may result in invalid markdown. // E.g. **text [text**](https://lexical.io) // is invalid markdown, as the closing ** is inside the link. // [...unclosableTags, ...unclosedTags]), (textNode, textContent) => exportTextFormat(textNode, textContent, textTransformersIndex, unclosedTags, unclosableTags)); if (result != null) { output.push(result); continue mainLoop; } } if (lexical.$isLineBreakNode(child)) { output.push('\n'); } else if (lexical.$isTextNode(child)) { output.push(exportTextFormat(child, child.getTextContent(), textTransformersIndex, unclosedTags, unclosableTags)); } else if (lexical.$isElementNode(child)) { // empty paragraph returns "" output.push(exportChildren(child, textTransformersIndex, textMatchTransformers, unclosedTags, unclosableTags)); } else if (lexical.$isDecoratorNode(child)) { output.push(child.getTextContent()); } } return output.join(''); } function exportTextFormat(node, textContent, textTransformers, // unclosed tags include the markdown tags that haven't been closed yet, and their associated formats unclosedTags, unclosableTags) { // This function handles the case of a string looking like this: " foo " // Where it would be invalid markdown to generate: "** foo **" // If the node has no format, we use the original text. // Otherwise, we escape leading and trailing whitespaces to their corresponding code points, // ensuring the returned string maintains its original formatting, e.g., "**   foo   **". let output = node.getFormat() === 0 ? textContent : escapeLeadingAndTrailingWhitespaces(textContent); if (!node.hasFormat('code')) { // Escape any markdown characters in the text content output = output.replace(/([*_`~\\])/g, '\\$1'); } // the opening tags to be added to the result let openingTags = ''; // the closing tags to be added to the result let closingTagsBefore = ''; let closingTagsAfter = ''; const prevNode = getTextSibling(node, true); const nextNode = getTextSibling(node, false); const applied = new Set(); for (const transformer of textTransformers) { const format = transformer.format[0]; const tag = transformer.tag; // dedup applied formats if (hasFormat(node, format) && !applied.has(format)) { // Multiple tags might be used for the same format (*, _) applied.add(format); // append the tag to openingTags, if it's not applied to the previous nodes, // or the nodes before that (which would result in an unclosed tag) if (!hasFormat(prevNode, format) || !unclosedTags.find(element => element.tag === tag)) { unclosedTags.push({ format, tag }); openingTags += tag; } } } // close any tags in the same order they were applied, if necessary for (let i = 0; i < unclosedTags.length; i++) { const nodeHasFormat = hasFormat(node, unclosedTags[i].format); const nextNodeHasFormat = hasFormat(nextNode, unclosedTags[i].format); // prevent adding closing tag if next sibling will do it if (nodeHasFormat && nextNodeHasFormat) { continue; } const unhandledUnclosedTags = [...unclosedTags]; // Shallow copy to avoid modifying the original array while (unhandledUnclosedTags.length > i) { const unclosedTag = unhandledUnclosedTags.pop(); // If tag is unclosable, don't close it and leave it in the original array, // So that it can be closed when it's no longer unclosable if (unclosableTags && unclosedTag && unclosableTags.find(element => element.tag === unclosedTag.tag)) { continue; } if (unclosedTag && typeof unclosedTag.tag === 'string') { if (!nodeHasFormat) { // Handles cases where the tag has not been closed before, e.g. if the previous node // was a text match transformer that did not account for closing tags of the next node (e.g. a link) closingTagsBefore += unclosedTag.tag; } else if (!nextNodeHasFormat) { closingTagsAfter += unclosedTag.tag; } } // Mutate the original array to remove the closed tag unclosedTags.pop(); } break; } output = openingTags + output + closingTagsAfter; // Replace trimmed version of textContent ensuring surrounding whitespace is not modified return closingTagsBefore + output; } function getTextSibling(node, backward) { const sibling = backward ? node.getPreviousSibling() : node.getNextSibling(); if (lexical.$isTextNode(sibling)) { return sibling; } return null; } function hasFormat(node, format) { return lexical.$isTextNode(node) && node.hasFormat(format); } function escapeLeadingAndTrailingWhitespaces(textContent) { return textContent.replace(/^\s+|\s+$/g, match => { return [...match].map(char => '&#' + char.codePointAt(0) + ';').join(''); }); } /** * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. * */ function findOutermostTextFormatTransformer(textNode, textFormatTransformersIndex) { const textContent = textNode.getTextContent(); // Find code span first. Emphasis delimiters inside inline elements (e.g., code spans) // should not be processed. Currently only code spans are handled; other inline elements // (e.g., links, raw HTML) may need similar treatment in the future. const codeRegex = textFormatTransformersIndex.fullMatchRegExpByTag['`']; const codeTransformer = textFormatTransformersIndex.transformersByTag['`']; const excludeRanges = []; let codeMatch = null; if (codeRegex && codeTransformer) { const globalRegex = new RegExp(codeRegex.source, 'g'); const matches = Array.from(textContent.matchAll(globalRegex)); for (const match of matches) { const startIndex = match.index; const endIndex = startIndex + match[0].length; if (!codeMatch) { codeMatch = { content: match[2], endIndex, startIndex, tag: '`' }; } excludeRanges.push({ end: endIndex, start: startIndex }); } } const delimiters = scanDelimiters(textContent, textFormatTransformersIndex, excludeRanges); const emphasisMatch = delimiters.length > 0 ? processEmphasis(textContent, delimiters, textFormatTransformersIndex) : null; let resultMatch = null; let resultTransformer = null; if (codeMatch && emphasisMatch) { if (emphasisMatch.startIndex <= codeMatch.startIndex && emphasisMatch.endIndex >= codeMatch.endIndex) { resultMatch = emphasisMatch; resultTransformer = textFormatTransformersIndex.transformersByTag[emphasisMatch.tag]; } else { resultMatch = codeMatch; resultTransformer = codeTransformer; } } else if (codeMatch) { resultMatch = codeMatch; resultTransformer = codeTransformer; } else if (emphasisMatch) { resultMatch = emphasisMatch; resultTransformer = textFormatTransformersIndex.transformersByTag[emphasisMatch.tag]; } if (!resultMatch || !resultTransformer) { return null; } const regexMatch = [textContent.slice(resultMatch.startIndex, resultMatch.endIndex), resultMatch.tag, resultMatch.content]; regexMatch.index = resultMatch.startIndex; regexMatch.input = textContent; return { endIndex: resultMatch.endIndex, match: regexMatch, startIndex: resultMatch.startIndex, transformer: resultTransformer }; } function scanDelimiters(text, transformersIndex, excludeRanges = []) { const delimiters = []; const delimiterChars = new Set(Object.keys(transformersIndex.transformersByTag).filter(tag => tag[0] !== '`').map(tag => tag[0])); const isEscaped = index => { let count = 0; for (let i = index - 1; i >= 0 && text[i] === '\\'; i--) { count++; } return count % 2 === 1; }; const isInExcludedRange = index => { return excludeRanges.some(range => index >= range.start && index < range.end); }; let i = 0; while (i < text.length) { const char = text[i]; if (!delimiterChars.has(char) || isEscaped(i) || isInExcludedRange(i)) { i++; continue; } let len = 1; while (i + len < text.length && text[i + len] === char) { len++; } const canOpen = canEmphasis(char, text, i, len, true); const canClose = canEmphasis(char, text, i, len, false); if (canOpen || canClose) { delimiters.push({ active: true, canClose, canOpen, char, index: i, length: len, originalLength: len }); } i += len; } return delimiters; } function processEmphasis(text, delimiters, transformersIndex) { const openersBottom = {}; let currentPos = 0; let result = null; while (currentPos < delimiters.length) { const closer = delimiters[currentPos]; if (!closer.active || !closer.canClose || closer.length === 0) { currentPos++; continue; } const bottomKey = `${closer.char}${closer.canOpen}`; const bottom = openersBottom[bottomKey] ?? -1; let foundOpener = false; for (let openIdx = currentPos - 1; openIdx > bottom; openIdx--) { const opener = delimiters[openIdx]; if (!opener.active || !opener.canOpen || opener.length === 0 || opener.char !== closer.char) { continue; } // Rule of 3 if (opener.canClose || closer.canOpen) { const sum = opener.originalLength + closer.originalLength; if (sum % 3 === 0 && opener.originalLength % 3 !== 0 && closer.originalLength % 3 !== 0) { continue; } } const maxLen = Math.min(opener.length, closer.length); const matchedTag = Object.keys(transformersIndex.transformersByTag).filter(t => t[0] === opener.char && t.length <= maxLen).sort((a, b) => b.length - a.length)[0]; if (!matchedTag) { continue; } foundOpener = true; const matchLen = matchedTag.length; const match = { content: text.slice(opener.index + opener.length, closer.index), endIndex: closer.index + matchLen, startIndex: opener.index + (opener.length - matchLen), tag: matchedTag }; if (!result || match.startIndex < result.startIndex || match.startIndex === result.startIndex && match.endIndex > result.endIndex) { result = match; } for (let j = openIdx + 1; j < currentPos; j++) { delimiters[j].active = false; } opener.length -= matchLen; closer.length -= matchLen; opener.active = opener.length > 0; if (closer.length > 0) { closer.index += matchLen; } else { closer.active = false; currentPos++; } break; } if (!foundOpener) { openersBottom[bottomKey] = currentPos - 1; if (!closer.canOpen) { closer.active = false; } currentPos++; } } return result; } function canEmphasis(char, text, index, length, isOpen) { if (!isFlanking(text, index, length, isOpen)) { return false; } if (char === '*') { return true; } if (char === '_') { if (!isFlanking(text, index, length, !isOpen)) { return true; } const adjacentChar = isOpen ? text[index - 1] : text[index + length]; return adjacentChar !== undefined && PUNCTUATION.test(adjacentChar); } return true; } function isFlanking(text, index, length, isLeft) { const charBefore = text[index - 1]; const charAfter = text[index + length]; const [primary, secondary] = isLeft ? [charAfter, charBefore] : [charBefore, charAfter]; if (primary === undefined || WHITESPACE.test(primary)) { return false; } if (!PUNCTUATION.test(primary)) { return true; } return secondary === undefined || WHITESPACE.test(secondary) || PUNCTUATION.test(secondary); } function importTextFormatTransformer(textNode, startIndex, endIndex, transformer, match) { const textContent = textNode.getTextContent(); // No text matches - we can safely process the text format match let transformedNode, nodeAfter, nodeBefore; // If matching full content there's no need to run splitText and can reuse existing textNode // to update its content and apply format. E.g. for **_Hello_** string after applying bold // format (**) it will reuse the same text node to apply italic (_) if (match[0] === textContent) { transformedNode = textNode; } else { if (startIndex === 0) { [transformedNode, nodeAfter] = textNode.splitText(endIndex); } else { [nodeBefore, transformedNode, nodeAfter] = textNode.splitText(startIndex, endIndex); } } transformedNode.setTextContent(match[2]); if (transformer) { for (const format of transformer.format) { if (!transformedNode.hasFormat(format)) { transformedNode.toggleFormat(format); } } } return { nodeAfter: nodeAfter, nodeBefore: nodeBefore, transformedNode: transformedNode }; } /** * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. * */ function findOutermostTextMatchTransformer(textNode_, textMatchTransformers) { const textNode = textNode_; let foundMatchStartIndex = undefined; let foundMatchEndIndex = undefined; let foundMatchTransformer = undefined; let foundMatch = undefined; for (const transformer of textMatchTransformers) { if (!transformer.replace || !transformer.importRegExp) { continue; } const match = textNode.getTextContent().match(transformer.importRegExp); if (!match) { continue; } const startIndex = match.index || 0; const endIndex = transformer.getEndIndex ? transformer.getEndIndex(textNode, match) : startIndex + match[0].length; if (endIndex === false) { continue; } if (foundMatchStartIndex === undefined || foundMatchEndIndex === undefined || // Wraps previous match or is strictly before it. startIndex < foundMatchStartIndex && (endIndex > foundMatchEndIndex || endIndex <= foundMatchStartIndex)) { foundMatchStartIndex = startIndex; foundMatchEndIndex = endIndex; foundMatchTransformer = transformer; foundMatch = match; } } if (foundMatchStartIndex === undefined || foundMatchEndIndex === undefined || foundMatchTransformer === undefined || foundMatch === undefined) { return null; } return { endIndex: foundMatchEndIndex, match: foundMatch, startIndex: foundMatchStartIndex, transformer: foundMatchTransformer }; } function importFoundTextMatchTransformer(textNode, startIndex, endIndex, transformer, match) { let transformedNode, nodeAfter, nodeBefore; if (startIndex === 0) { [transformedNode, nodeAfter] = textNode.splitText(endIndex); } else { [nodeBefore, transformedNode, nodeAfter] = textNode.splitText(startIndex, endIndex); } if (!transformer.replace) { return null; } const potentialTransformedNode = transformer.replace(transformedNode, match); return { nodeAfter, nodeBefore, transformedNode: potentialTransformedNode || undefined }; } /** * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. * */ /** * Returns true if the node can contain transformable markdown. * Code nodes cannot contain transformable markdown. * For example, `code **bold**` should not be transformed to * <code>code <strong>bold</strong></code>. */ function canContainTransformableMarkdown(node) { return lexical.$isTextNode(node) && !node.hasFormat('code'); } /** * Handles applying both text format and text match transformers. * It finds the outermost text format or text match and applies it, * then recursively calls itself to apply the next outermost transformer, * until there are no more transformers to apply. */ function importTextTransformers(textNode, textFormatTransformersIndex, textMatchTransformers) { let foundTextFormat = findOutermostTextFormatTransformer(textNode, textFormatTransformersIndex); let foundTextMatch = findOutermostTextMatchTransformer(textNode, textMatchTransformers); if (foundTextFormat && foundTextMatch) { // Find the outermost transformer if (foundTextFormat.startIndex <= foundTextMatch.startIndex && foundTextFormat.endIndex >= foundTextMatch.endIndex || // foundTextMatch is not contained within foundTextFormat foundTextMatch.startIndex > foundTextFormat.endIndex) { // foundTextFormat wraps foundTextMatch - apply foundTextFormat by setting foundTextMatch to null foundTextMatch = null; } else { // foundTextMatch wraps foundTextFormat - apply foundTextMatch by setting foundTextFormat to null foundTextFormat = null; } } if (foundTextFormat) { const result = importTextFormatTransformer(textNode, foundTextFormat.startIndex, foundTextFormat.endIndex, foundTextFormat.transformer, foundTextFormat.match); if (canContainTransformableMarkdown(result.nodeAfter)) { importTextTransformers(result.nodeAfter, textFormatTransformersIndex, textMatchTransformers); } if (canContainTransformableMarkdown(result.nodeBefore)) { importTextTransformers(result.nodeBefore, textFormatTransformersIndex, textMatchTransformers); } if (canContainTransformableMarkdown(result.transformedNode)) { importTextTransformers(result.transformedNode, textFormatTransformersIndex, textMatchTransformers); } } else if (foundTextMatch) { const result = importFoundTextMatchTransformer(textNode, foundTextMatch.startIndex, foundTextMatch.endIndex, foundTextMatch.transformer, foundTextMatch.match); if (!result) { return; } if (canContainTransformableMarkdown(result.nodeAfter)) { importTextTransformers(result.nodeAfter, textFormatTransformersIndex, textMatchTransformers); } if (canContainTransformableMarkdown(result.nodeBefore)) { importTextTransformers(result.nodeBefore, textFormatTransformersIndex, textMatchTransformers); } if (canContainTransformableMarkdown(result.transformedNode)) { importTextTransformers(result.transformedNode, textFormatTransformersIndex, textMatchTransformers); } } // Handle escape characters const textContent = textNode.getTextContent(); const escapedText = textContent.replace(/\\([*_`~\\])/g, '$1').replace(/&#(\d+);/g, (_, codePoint) => { return String.fromCodePoint(codePoint); }); textNode.setTextContent(escapedText); } /** * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. * */ /** * Renders markdown from a string. The selection is moved to the start after the operation. */ function createMarkdownImport(transformers, shouldPreserveNewLines = false) { const byType = transformersByType(transformers); const textFormatTransformersIndex = createTextFormatTransformersIndex(byType.textFormat); return (markdownString, node) => { const lines = markdownString.split('\n'); const linesLength = lines.length; const root = node || lexical.$getRoot(); root.clear(); for (let i = 0; i < linesLength; i++) { const lineText = lines[i]; const [imported, shiftedIndex] = $importMultiline(lines, i, byType.multilineElement, root); if (imported) { // If a multiline markdown element was imported, we don't want to process the lines that were part of it anymore. // There could be other sub-markdown elements (both multiline and normal ones) matching within this matched multiline element's children. // However, it would be the responsibility of the matched multiline transformer to decide how it wants to handle them. // We cannot handle those, as there is no way for us to know how to maintain the correct order of generated lexical nodes for possible children. i = shiftedIndex; // Next loop will start from the line after the last line of the multiline element continue; } $importBlocks(lineText, root, byType.element, textFormatTransformersIndex, byType.textMatch, shouldPreserveNewLines); } // By default, removing empty paragraphs as md does not really // allow empty lines and uses them as delimiter. // If you need empty lines set shouldPreserveNewLines = true. const children = root.getChildren(); for (const child of children) { if (!shouldPreserveNewLines && isEmptyParagraph(child) && root.getChildrenSize() > 1) { child.remove(); } } if (lexical.$getSelection() !== null) { root.selectStart(); } }; } /** * * @returns first element of the returned tuple is a boolean indicating if a multiline element was imported. The second element is the index of the last line that was processed. */ function $importMultiline(lines, startLineIndex, multilineElementTransformers, rootNode) { for (const transformer of multilineElementTransformers) { const { handleImportAfterStartMatch, regExpEnd, regExpStart, replace } = transformer; const startMatch = lines[startLineIndex].match(regExpStart); if (!startMatch) { continue; // Try next transformer } if (handleImportAfterStartMatch) { const result = handleImportAfterStartMatch({ lines, rootNode, startLineIndex, startMatch, transformer }); if (result === null) { continue; } else if (result) { return result; } } const regexpEndRegex = typeof regExpEnd === 'object' && 'regExp' in regExpEnd ? regExpEnd.regExp : regExpEnd; const isEndOptional = regExpEnd && typeof regExpEnd === 'object' && 'optional' in regExpEnd ? regExpEnd.optional : !regExpEnd; let endLineIndex = startLineIndex; const linesLength = lines.length; // check every single line for the closing match. It could also be on the same line as the opening match. while (endLineIndex < linesLength) { const endMatch = regexpEndRegex ? lines[endLineIndex].match(regexpEndRegex) : null; if (!endMatch) { if (!isEndOptional || isEndOptional && endLineIndex < linesLength - 1 // Optional end, but didn't reach the end of the document yet => continue searching for potential closing match ) { endLineIndex++; continue; // Search next line for closing match } } // Now, check if the closing match matched is the same as the opening match. // If it is, we need to continue searching for the actual closing match. if (endMatch && startLineIndex === endLineIndex && endMatch.index === startMatch.index) { endLineIndex++; continue; // Search next line for closing match } // At this point, we have found the closing match. Next: calculate the lines in between open and closing match // This should not include the matches themselves, and be split up by lines const linesInBetween = []; if (endMatch && startLineIndex === endLineIndex) { linesInBetween.push(lines[startLineIndex].slice(startMatch[0].length, -endMatch[0].length)); } else { for (let i = startLineIndex; i <= endLineIndex; i++) { if (i === startLineIndex) { const text = lines[i].slice(startMatch[0].length); linesInBetween.push(text); // Also include empty text } else if (i === endLineIndex && endMatch) { const text = lines[i].slice(0, -endMatch[0].length); linesInBetween.push(text); // Also include empty text } else { linesInBetween.push(lines[i]); } } } if (replace(rootNode, null, startMatch, endMatch, linesInBetween, true) !== false) { // Return here. This $importMultiline function is run line by line and should only process a single multiline element at a time. return [true, endLineIndex]; } // The replace function returned false, despite finding the matching open and close tags => this transformer does not want to handle it. // Thus, we continue letting the remaining transformers handle the passed lines of text from the beginning break; } } // No multiline transformer handled this line successfully return [false, startLineIndex]; } function $importBlocks(lineText, rootNode, elementTransformers, textFormatTransformersIndex, textMatchTransformers, shouldPreserveNewLines) { const textNode = lexical.$createTextNode(lineText); const elementNode = lexical.$createParagraphNode(); elementNode.append(textNode); rootNode.append(elementNode); for (const { regExp, replace } of elementTransformers) { const match = lineText.match(regExp); if (match) { textNode.setTextContent(lineText.slice(match[0].length)); if (replace(elementNode, [textNode], match, true) !== false) { break; } } } importTextTransformers(textNode, textFormatTransformersIndex, textMatchTransformers); // If no transformer found and we left with original paragraph node // can check if its content can be appended to the previous node // if it's a paragraph, quote or list if (elementNode.isAttached() && lineText.length > 0) { const previousNode = elementNode.getPreviousSibling(); if (!shouldPreserveNewLines && ( // Only append if we're not preserving newlines lexical.$isParagraphNode(previousNode) || richText.$isQuoteNode(previousNode) || list.$isListNode(previousNode))) { let targetNode = previousNode; if (list.$isListNode(previousNode)) { const lastDescendant = previousNode.getLastDescendant(); if (lastDescendant == null) { targetNode = null; } else { targetNode = utils.$findMatchingParent(lastDescendant, list.$isListItemNode); } } if (targetNode != null && targetNode.getTextContentSize() > 0) { targetNode.splice(targetNode.getChildrenSize(), 0, [lexical.$createLineBreakNode(), ...elementNode.getChildren()]); elementNode.remove(); } } } } function createTextFormatTransformersIndex(textTransformers) { const transformersByTag = {}; const fullMatchRegExpByTag = {}; const openTagsRegExp = []; const escapeRegExp = `(?<![\\\\])`; for (const transformer of textTransformers) { const { tag } = transformer; transformersByTag[tag] = transformer; const tagRegExp = tag.replace(/(\*|\^|\+)/g, '\\$1'); openTagsRegExp.push(tagRegExp); // Single-char tag (e.g. "*"), if (tag.length === 1) { if (tag === '`') { // Special handling for backticks - match content with escaped backticks fullMatchRegExpByTag[tag] = new RegExp(`(?<![\\\\\`])(\`)((?:\\\\\`|[^\`])+?)(\`)(?!\`)`); } else { fullMatchRegExpByTag[tag] = new RegExp(`(?<![\\\\${tagRegExp}])(${tagRegExp})((\\\\${tagRegExp})?.*?[^${tagRegExp}\\s](\\\\${tagRegExp})?)((?<!\\\\)|(?<=\\\\\\\\))(${tagRegExp})(?![\\\\${tagRegExp}])`); } } else { // Multi‐char tags (e.g. "**") fullMatchRegExpByTag[tag] = new RegExp(`(?<!\\\\)(${tagRegExp})((\\\\${tagRegExp})?.*?[^\\s](\\\\${tagRegExp})?)((?<!\\\\)|(?<=\\\\\\\\))(${tagRegExp})(?!\\\\)`); } } return { // Reg exp to find open tag + content + close tag fullMatchRegExpByTag, // Regexp to locate *any* potential opening tag (longest first). openTagsRegExp: new RegExp(`${escapeRegExp}(${openTagsRegExp.join('|')})`, 'g'), transformersByTag }; } /** * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. * */ // Do not require this module directly! Use normal `invariant` calls. function formatDevErrorMessage(message) { throw new Error(message); } /** * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. * */ const ORDERED_LIST_REGEX = /^(\s*)(\d{1,})\.\s/; const UNORDERED_LIST_REGEX = /^(\s*)[-*+]\s/; const CHECK_LIST_REGEX = /^(\s*)(?:[-*+]\s)?\s?(\[(\s|x)?\])\s/i; const HEADING_REGEX = /^(#{1,6})\s/; const QUOTE_REGEX = /^>\s/; const CODE_START_REGEX = /^([ \t]*`{3,})([\w-]+)?[ \t]?/; const CODE_END_REGEX = /^[ \t]*`{3,}$/; const CODE_SINGLE_LINE_REGEX = /^[ \t]*```[^`]+(?:(?:`{1,2}|`{4,})[^`]+)*```(?:[^`]|$)/; const TABLE_ROW_REG_EXP = /^(?:\|)(.+)(?:\|)\s?$/; const TABLE_ROW_DIVIDER_REG_EXP = /^(\| ?:?-*:? ?)+\|\s?$/; const TAG_START_REGEX = /^<[a-z_][\w-]*(?:\s[^<>]*)?\/?>/i; const TAG_END_REGEX = /^<\/[a-z_][\w-]*\s*>/i; const ENDS_WITH = regex => new RegExp(`(?:${regex.source})$`, regex.flags); const listMarkerState = lexical.createState('mdListMarker', { parse: v => typeof v === 'string' && /^[-*+]$/.test(v) ? v : '-' }); const codeFenceState = lexical.createState('mdCodeFence', { parse: val => { if (typeof val === 'string' && /^`{3,}$/.test(val)) { return val; } return '```'; } }); const createBlockNode = createNode => { return (parentNode, children, match, isImport) => { const node = createNode(match); node.append(...children); parentNode.replace(node); if (!isImport) { node.select(0, 0); } }; }; // Amount of spaces that define indentation level // TODO: should be an option const LIST_INDENT_SIZE = 4; function getIndent(whitespaces) { const tabs = whitespaces.match(/\t/g); const spaces = whitespaces.match(/ /g); let indent = 0; if (tabs) { indent += tabs.length; } if (spaces) { indent += Math.floor(spaces.length / LIST_INDENT_SIZE); } return indent; } const listReplace = listType => { return (parentNode, children, match, isImport) => { const previousNode = parentNode.getPreviousSibling(); const nextNode = parentNode.getNextSibling(); const listItem = list.$createListItemNode(listType === 'check' ? match[3] === 'x' : undefined); const firstMatchChar = match[0].trim()[0]; const listMarker = (listType === 'bullet' || listType === 'check') && firstMatchChar === listMarkerState.parse(firstMatchChar) ? firstMatchChar : undefined; if (list.$isListNode(nextNode) && nextNode.getListType() === listType) { if (listMarker) { lexical.$setState(nextNode, listMarkerState, listMarker); } const firstChild = nextNode.getFirstChild(); if (firstChild !== null) { firstChild.insertBefore(listItem); } else { // should never happen, but let's handle gracefully, just in case. nextNode.append(listItem); } parentNode.remove(); } else if (list.$isListNode(previousNode) && previousNode.getListType() === listType) { if (listMarker) { lexical.$setState(previousNode, listMarkerState, listMarker); } previousNode.append(listItem); parentNode.remove(); } else { const list$1 = list.$createListNode(listType, listType === 'number' ? Number(match[2]) : undefined); if (listMarker) { lexical.$setState(list$1, listMarkerState, listMarker); } list$1.append(listItem); parentNode.replace(list$1); } listItem.append(...children); if (!isImport) { listItem.select(0, 0); } const indent = getIndent(match[1]); if (indent) { listItem.setIndent(indent); } }; }; const $listExport = (listNode, exportChildren, depth) => { const output = []; const children = listNode.getChildren(); let index = 0; for (const listItemNode of children) { if (list.$isListItemNode(listItemNode)) { if (listItemNode.getChildrenSize() === 1) { const firstChild = listItemNode.getFirstChild(); if (list.$isListNode(firstChild)) { output.push($listExport(firstChild, exportChildren, depth + 1)); continue; } } const indent = ' '.repeat(depth * LIST_INDENT_SIZE); const listType = listNode.getListType(); const listMarker = lexical.$getState(listNode, listMarkerState); const prefix = listType === 'number' ? `${listNode.getStart() + index}. ` : listType === 'check' ? `${listMarker} [${listItemNode.getChecked() ? 'x' : ' '}] ` : listMarker + ' '; output.push(indent + prefix + exportChildren(listItemNode)); index++; } } return output.join('\n'); }; const HEADING = { dependencies: [richText.HeadingNode], export: (node, exportChildren) => { if (!richText.$isHeadingNode(node)) { return null; } const level = Number(node.getTag().slice(1)); return '#'.repeat(level) + ' ' + exportChildren(node); }, regExp: HEADING_REGEX, replace: createBlockNode(match => { const tag = 'h' + match[1].length; return richText.$createHeadingNode(tag); }), type: 'element' }; const QUOTE = { dependencies: [richText.QuoteNode], export: (node, exportChildren) => { if (!richText.$isQuoteNode(node)) { return null; } const lines = exportChildren(node).split('\n'); const output = []; for (const line of lines) { output.push('> ' + line); } return output.join('\n'); }, regExp: QUOTE_REGEX, replace: (parentNode, children, _match, isImport) => { if (isImport) { const previousNode = parentNode.getPreviousSibling(); if (richText.$isQuoteNode(previousNode)) { previousNode.splice(previousNode.getChildrenSize(), 0, [lexical.$createLineBreakNode(), ...children]); parentNode.remove(); return; } } const node = richText.$createQuoteNode(); node.append(...children); parentNode.replace(node); if (!isImport) { node.select(0, 0); } }, type: 'element' }; const CODE = { dependencies: [code.CodeNode], export: node => { if (!code.$isCodeNode(node)) { return null; } const textContent = node.getTextContent(); let fence = lexical.$getState(node, codeFenceState); if (textContent.indexOf(fence) > -1) { const backticks = textContent.match(/`{3,}/g); if (backticks) { const maxLength = Math.max(...backticks.map(b => b.length)); fence = '`'.repeat(maxLength + 1); } } return fence + (node.getLanguage() || '') + (textContent ? '\n' + textContent : '') + '\n' + fence; }, handleImportAfterStartMatch: ({ lines, rootNode, startLineIndex, startMatch }) => { const fence = startMatch[1]; const fenceLength = fence.trim().length; const currentLine = lines[startLineIndex]; const afterFenceIndex = startMatch.index + fence.length; const afterFence = currentLine.slice(afterFenceIndex); const singleLineEndRegex = new RegExp(`\`{${fenceLength},}$`); if (singleLineEndRegex.test(afterFence)) { const endMatch = afterFence.match(singleLineEndRegex); const content = afterFence.slice(0, afterFence.lastIndexOf(endMatch[0])); const fakeStartMatch = [...startMatch]; fakeStartMatch[2] = ''; CODE.replace(rootNode, null, fakeStartMatch, endMatch, [content], true); return [true, startLineIndex]; } const multilineEndRegex = new RegExp(`^[ \\t]*\`{${fenceLength},}$`); for (let i = startLineIndex + 1; i < lines.length; i++) { const line = lines[i]; if (multilineEndRegex.test(line)) { const endMatch = line.match(multilineEndRegex); const linesInBetween = lines.slice(startLineIndex + 1, i); const afterFullMatch = currentLine.slice(startMatch[0].length); if (afterFullMatch.length > 0) { linesInBetween.unshift(afterFullMatch); } CODE.replace(rootNode, null, startMatch, endMatch, linesInBetween, true); return [true, i]; } } const linesInBetween = lines.slice(startLineIndex + 1); const afterFullMatch = currentLine.slice(startMatch[0].length); if (afterFullMatch.length > 0) { linesInBetween.unshift(afterFullMatch); } CODE.replace(rootNode, null, startMatch, null, linesInBetween, true); return [true, lines.length - 1]; }, regExpEnd: { optional: true, regExp: CODE_END_REGEX }, regExpStart: CODE_START_REGEX, replace: (rootNode, children, startMatch, endMatch, linesInBetween, isImport) => { let codeBlockNode; let code$1; const fence = startMatch[1] ? startMatch[1].trim() : '```'; const language = startMatch[2] || undefined; if (!children && linesInBetween) { if (linesInBetween.length === 1) { if (endMatch) { codeBlockNode = code.$createCodeNode(language); code$1 = linesInBetween[0]; } else { codeBlockNode = code.$createCodeNode(language); code$1 = linesInBetween[0].startsWith(' ') ? linesInBetween[0].slice(1) : linesInBetween[0]; } } else { codeBlockNode = code.$createCodeNode(language); if (linesInBetween.length > 0) { if (linesInBetween[0].trim().length === 0) { linesInBetween.shift(); } else if (linesInBetween[0].startsWith(' ')) { linesInBetween[0] = linesInBetween[0].slice(1); } } while (linesInBetween.length > 0 && !linesInBetween[linesInBetween.length - 1].length) { linesInBetween.pop(); } code$1 = linesInBetween.join('\n'); } lexical.$setState(codeBlockNode, codeFenceState, fence); const textNode = lexical.$createTextNode(code$1); codeBlockNode.append(textNode); rootNode.append(codeBlockNode); } else if (children) { createBlockNode(match => { return code.$createCodeNode(match ? match[2] : undefined); })(rootNode, children, startMatch, isImport); } }, type: 'multiline-element' }; const UNORDERED_LIST = { dependencies: [list.ListNode, list.ListItemNode], export: (node, exportChildren) => { return list.$isListNode(node) ? $listExport(node, exportChildren, 0) : null; }, regExp: UNORDERED_LIST_REGEX, replace: listReplace('bullet'), type: 'element' }; const CHECK_LIST = { dependencies: [list.ListNode, list.ListItemNode], export: (node, exportChildren) => { return list.$isListNode(node) ? $listExport(node, exportChildren, 0) : null; }, regExp: CHECK_LIST_REGEX, replace: listReplace('check'), type: 'element' }; const ORDERED_LIST = { dependencies: [list.ListNode, list.ListItemNode], export: (node, exportChildren) => { return list.$isListNode(node) ? $listExport(node, exportChildren, 0) : null; }, regExp: ORDERED_LIST_REGEX, replace: listReplace('number'), type: 'element' }; const INLINE_CODE = { format: ['code'], tag: '`', type: 'text-format' }; const HIGHLIGHT = { format: ['highlight'], tag: '==', type: 'text-format' }; const BOLD_ITALIC_STAR = { format: ['bold', 'italic'], tag: '***', type: 'text-format' }; const BOLD_ITALIC_UNDERSCORE = { format: ['bold', 'italic'], intraword: false, tag: '___', type: 'text-format' }; const BOLD_STAR = { format: ['bold'], tag: '**', type: 'text-format' }; const BOLD_UNDERSCORE = { format: ['bold'], intraword: false, tag: '__', type: 'text-format' }; const STRIKETHROUGH = { format: ['strikethrough'], tag: '~~', type: 'text-format' }; const ITALIC_STAR = { format: ['italic'], tag: '*', type: 'text-format' }; const ITALIC_UNDERSCORE = { format: ['italic'], intraword: false, tag: '_', type: 'text-format' }; // Order of text transformers matters: // // - code should go first as it prevents any transformations inside // - then longer tags match (e.g. ** or __ should go before * or _) const LINK = { dependencies: [link.LinkNode], export: (node, exportChildren, exportFormat) => { if (!link.$isLinkNode(node) || link.$isAutoLinkNode(node)) { return null; } const title = node.getTitle(); const textContent = exportChildren(node); const linkContent = title ? `[${textContent}](${node.getURL()} "${title}")` : `[${textContent}](${node.getURL()})`; return linkContent; }, importRegExp: /(?:\[(.+?)\])(?:\((?:([^()\s]+)(?:\s"((?:[^"]*\\")*[^"]*)"\s*)?)\))/, regExp: /(?:\[([^[\]]*(?:\[[^[\]]*\][^[\]]*)*)\])(?:\((?:([^()\s]+)(?:\s"((?:[^"]*\\")*[^"]*)"\s*)?)\))$/, replace: (textNode, match) => { // https://spec.commonmark.org/0.31.2/#inline-link if (lexical.$findMatchingParent(textNode, link.$isLinkNode)) { return; } const [, linkText, linkUrl, linkTitle] = match; const linkNode = link.$createLinkNode(linkUrl, { title: linkTitle }); const openBracketAmount = linkText.split('[').length - 1; const closeBracketAmount = linkText.split(']').length - 1; let parsedLinkText = linkText; let outsideLinkText = ''; if (openBracketAmount < closeBracketAmount) { return; } else if (openBracketAmount > closeBracketAmount) { const linkTextParts = linkText.split('['); outsideLinkText = '[' + linkTextParts[0]; parsedLinkText = linkTextParts.slice(1).join('['); } const linkTextNode = lexical.$createTextNode(parsedLinkText); linkTextNode.setFormat(textNode.getFormat()); linkNode.append(linkTextNode); textNode.replace(linkNode); if (outsideLinkText) { linkNode.insertBefore(lexical.$createTextNode(outsideLinkText)); } return linkTextNode; }, trigger: ')', type: 'text-match' }; const ELEMENT_TRANSFORMERS = [HEADING, QUOTE, UNORDERED_LIST, ORDERED_LIST]; const MULTILINE_ELEMENT_TRANSFORMERS = [CODE]; // Order of text format transformers matters: // // - code should go first as it prevents any transformations inside // - then longer tags match (e.g. ** or __ should go before * or _) const TEXT_FORMAT_TRANSFORMERS = [INLINE_CODE, BOLD_ITALIC_STAR, BOLD_ITALIC_UNDERSCORE, BOLD_STAR, BOLD_UNDERSCORE, HIGHLIGHT, ITALIC_STAR, ITALIC_UNDERSCORE, STRIKETHROUGH]; const TEXT_MATCH_TRANSFORMERS = [LINK]; const TRANSFORMERS = [...ELEMENT_TRANSFORMERS, ...MULTILINE_ELEMENT_TRANSFORMERS, ...TEXT_FORMAT_TRANSFORMERS, ...TEXT_MATCH_TRANSFORMERS]; function normalizeMarkdown(input, shouldMergeAdjacentLines = false) { const lines = input.split('\n'); let inCodeBlock = false; const sanitizedLines = []; for (let i = 0; i < lines.length; i++) { const line = lines[i].trimEnd(); const lastLine = sanitizedLines[sanitizedLines.length - 1]; // Code blocks of ```single line``` don't toggle the inCodeBlock flag if (CODE_SINGLE_LINE_REGEX.test(line)) { sanitizedLines.push(line); continue; } // Detect the start or end of a code block if (CODE_START_REGEX.test(line) || CODE_END_REGEX.test(line)) { inCodeBlock = !inCodeBlock; sanitizedLines.push(line); continue; } // If we are inside a code block, keep the line unchanged if (inCodeBlock) { sanitizedLines.push(line); continue; } // In markdown the concept of "empty paragraphs" does not exist. // Blocks must be separated by an empty line. Non-empty adjacent lines must be merged. if (line === '' || lastLine === '' || !lastLine || HEADING_REGEX.test(lastLine) || HEADING_REGEX.test(line) || QUOTE_REGEX.test(line) || ORDERED_LIST_REGEX.test(line) || UNORDERED_LIST_REGEX.test(line) || CHECK_LIST_REGEX.test(line) || TABLE_ROW_REG_EXP.test(line) || TABLE_ROW_DIVIDER_REG_EXP.test(line) || !shouldMergeAdjacentLines || TAG_START_REGEX.test(line) || TAG_END_REGEX.test(line) || ENDS_WITH(TAG_END_REGEX).test(lastLine) || ENDS_WITH(TAG_START_REGEX).test(lastLine) || CODE_END_REGEX.test(lastLine)) { sanitizedLines.push(line); } else { sanitizedLines[sanitizedLines.length - 1] = lastLine + ' ' + line.trimStart(); } } return sanitizedLines.join('\n'); } function runElementTransformers(parentNode, anchorNode, anchorOffset, elementTransformers) { const grandParentNode = parentNode.getParent(); if (!lexical.$isRootOrShadowRoot(grandParentNode) || parentNode.getFirstChild() !== anchorNode) { return false; } const textContent = anchorNode.getTextContent(); // Checking for anchorOffset position to prevent any checks for cases when caret is too far // from a line start to be a part of block-level markdown trigger. // // TODO: // Can have a quick check if caret is close enough to the beginning of the string (e.g. offset less than 10-20) // since otherwis