UNPKG

@lexical/markdown

Version:

This package contains Markdown helpers and functionality for Lexical.

1,305 lines (1,241 loc) 57.1 kB
/** * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. * */ import { $isParagraphNode, $isTextNode, $getRoot, $isElementNode, $isDecoratorNode, $isLineBreakNode, $getSelection, $createTextNode, $createParagraphNode, $createLineBreakNode, createState, $setState, $getState, COLLABORATION_TAG, HISTORIC_TAG, $isRangeSelection, $isRootOrShadowRoot, $createRangeSelection, $setSelection } from 'lexical'; import { $isListNode, $isListItemNode, ListNode, ListItemNode, $createListItemNode, $createListNode } from '@lexical/list'; import { $isQuoteNode, HeadingNode, QuoteNode, $createHeadingNode, $isHeadingNode, $createQuoteNode } from '@lexical/rich-text'; import { $findMatchingParent } from '@lexical/utils'; import { CodeNode, $createCodeNode, $isCodeNode } from '@lexical/code'; import { LinkNode, $createLinkNode, $isLinkNode, $isAutoLinkNode } from '@lexical/link'; /** * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. * */ function indexBy(list, callback) { const index = {}; for (const item of list) { const key = callback(item); if (!key) { continue; } if (index[key]) { index[key].push(item); } else { index[key] = [item]; } } return index; } function transformersByType(transformers) { const byType = indexBy(transformers, t => t.type); return { element: byType.element || [], multilineElement: byType['multiline-element'] || [], textFormat: byType['text-format'] || [], textMatch: byType['text-match'] || [] }; } const PUNCTUATION_OR_SPACE = /[!-/:-@[-`{-~\s]/; const MARKDOWN_EMPTY_LINE_REG_EXP = /^\s{0,3}$/; function isEmptyParagraph(node) { if (!$isParagraphNode(node)) { return false; } const firstChild = node.getFirstChild(); return firstChild == null || node.getChildrenSize() === 1 && $isTextNode(firstChild) && MARKDOWN_EMPTY_LINE_REG_EXP.test(firstChild.getTextContent()); } /** * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. * */ /** * Renders string from markdown. The selection is moved to the start after the operation. */ function createMarkdownExport(transformers, shouldPreserveNewLines = false) { const byType = transformersByType(transformers); const elementTransformers = [...byType.multilineElement, ...byType.element]; const isNewlineDelimited = !shouldPreserveNewLines; // Export only uses text formats that are responsible for single format // e.g. it will filter out *** (bold, italic) and instead use separate ** and * const textFormatTransformers = byType.textFormat.filter(transformer => transformer.format.length === 1) // Make sure all text transformers that contain 'code' in their format are at the end of the array. Otherwise, formatted code like // <strong><code>code</code></strong> will be exported as `**Bold Code**`, as the code format will be applied first, and the bold format // will be applied second and thus skipped entirely, as the code format will prevent any further formatting. .sort((a, b) => { return Number(a.format.includes('code')) - Number(b.format.includes('code')); }); return node => { const output = []; const children = (node || $getRoot()).getChildren(); for (let i = 0; i < children.length; i++) { const child = children[i]; const result = exportTopLevelElements(child, elementTransformers, textFormatTransformers, byType.textMatch); if (result != null) { output.push( // separate consecutive group of texts with a line break: eg. ["hello", "world"] -> ["hello", "/nworld"] isNewlineDelimited && i > 0 && !isEmptyParagraph(child) && !isEmptyParagraph(children[i - 1]) ? '\n'.concat(result) : result); } } // Ensure consecutive groups of texts are at least \n\n apart while each empty paragraph render as a newline. // Eg. ["hello", "", "", "hi", "\nworld"] -> "hello\n\n\nhi\n\nworld" return output.join('\n'); }; } function exportTopLevelElements(node, elementTransformers, textTransformersIndex, textMatchTransformers) { for (const transformer of elementTransformers) { if (!transformer.export) { continue; } const result = transformer.export(node, _node => exportChildren(_node, textTransformersIndex, textMatchTransformers)); if (result != null) { return result; } } if ($isElementNode(node)) { return exportChildren(node, textTransformersIndex, textMatchTransformers); } else if ($isDecoratorNode(node)) { return node.getTextContent(); } else { return null; } } function exportChildren(node, textTransformersIndex, textMatchTransformers, unclosedTags, unclosableTags) { const output = []; const children = node.getChildren(); // keep track of unclosed tags from the very beginning if (!unclosedTags) { unclosedTags = []; } if (!unclosableTags) { unclosableTags = []; } mainLoop: for (const child of children) { for (const transformer of textMatchTransformers) { if (!transformer.export) { continue; } const result = transformer.export(child, parentNode => exportChildren(parentNode, textTransformersIndex, textMatchTransformers, unclosedTags, // Add current unclosed tags to the list of unclosable tags - we don't want nested tags from // textmatch transformers to close the outer ones, as that may result in invalid markdown. // E.g. **text [text**](https://lexical.io) // is invalid markdown, as the closing ** is inside the link. // [...unclosableTags, ...unclosedTags]), (textNode, textContent) => exportTextFormat(textNode, textContent, textTransformersIndex, unclosedTags, unclosableTags)); if (result != null) { output.push(result); continue mainLoop; } } if ($isLineBreakNode(child)) { output.push('\n'); } else if ($isTextNode(child)) { output.push(exportTextFormat(child, child.getTextContent(), textTransformersIndex, unclosedTags, unclosableTags)); } else if ($isElementNode(child)) { // empty paragraph returns "" output.push(exportChildren(child, textTransformersIndex, textMatchTransformers, unclosedTags, unclosableTags)); } else if ($isDecoratorNode(child)) { output.push(child.getTextContent()); } } return output.join(''); } function exportTextFormat(node, textContent, textTransformers, // unclosed tags include the markdown tags that haven't been closed yet, and their associated formats unclosedTags, unclosableTags) { // This function handles the case of a string looking like this: " foo " // Where it would be invalid markdown to generate: "** foo **" // If the node has no format, we use the original text. // Otherwise, we escape leading and trailing whitespaces to their corresponding code points, // ensuring the returned string maintains its original formatting, e.g., "**&#32;&#32;&#32;foo&#32;&#32;&#32;**". let output = node.getFormat() === 0 ? textContent : escapeLeadingAndTrailingWhitespaces(textContent); if (!node.hasFormat('code')) { // Escape any markdown characters in the text content output = output.replace(/([*_`~\\])/g, '\\$1'); } // the opening tags to be added to the result let openingTags = ''; // the closing tags to be added to the result let closingTagsBefore = ''; let closingTagsAfter = ''; const prevNode = getTextSibling(node, true); const nextNode = getTextSibling(node, false); const applied = new Set(); for (const transformer of textTransformers) { const format = transformer.format[0]; const tag = transformer.tag; // dedup applied formats if (hasFormat(node, format) && !applied.has(format)) { // Multiple tags might be used for the same format (*, _) applied.add(format); // append the tag to openingTags, if it's not applied to the previous nodes, // or the nodes before that (which would result in an unclosed tag) if (!hasFormat(prevNode, format) || !unclosedTags.find(element => element.tag === tag)) { unclosedTags.push({ format, tag }); openingTags += tag; } } } // close any tags in the same order they were applied, if necessary for (let i = 0; i < unclosedTags.length; i++) { const nodeHasFormat = hasFormat(node, unclosedTags[i].format); const nextNodeHasFormat = hasFormat(nextNode, unclosedTags[i].format); // prevent adding closing tag if next sibling will do it if (nodeHasFormat && nextNodeHasFormat) { continue; } const unhandledUnclosedTags = [...unclosedTags]; // Shallow copy to avoid modifying the original array while (unhandledUnclosedTags.length > i) { const unclosedTag = unhandledUnclosedTags.pop(); // If tag is unclosable, don't close it and leave it in the original array, // So that it can be closed when it's no longer unclosable if (unclosableTags && unclosedTag && unclosableTags.find(element => element.tag === unclosedTag.tag)) { continue; } if (unclosedTag && typeof unclosedTag.tag === 'string') { if (!nodeHasFormat) { // Handles cases where the tag has not been closed before, e.g. if the previous node // was a text match transformer that did not account for closing tags of the next node (e.g. a link) closingTagsBefore += unclosedTag.tag; } else if (!nextNodeHasFormat) { closingTagsAfter += unclosedTag.tag; } } // Mutate the original array to remove the closed tag unclosedTags.pop(); } break; } output = openingTags + output + closingTagsAfter; // Replace trimmed version of textContent ensuring surrounding whitespace is not modified return closingTagsBefore + output; } // Get next or previous text sibling a text node, including cases // when it's a child of inline element (e.g. link) function getTextSibling(node, backward) { let sibling = backward ? node.getPreviousSibling() : node.getNextSibling(); if (!sibling) { const parent = node.getParentOrThrow(); if (parent.isInline()) { sibling = backward ? parent.getPreviousSibling() : parent.getNextSibling(); } } while (sibling) { if ($isElementNode(sibling)) { if (!sibling.isInline()) { break; } const descendant = backward ? sibling.getLastDescendant() : sibling.getFirstDescendant(); if ($isTextNode(descendant)) { return descendant; } else { sibling = backward ? sibling.getPreviousSibling() : sibling.getNextSibling(); } } if ($isTextNode(sibling)) { return sibling; } if (!$isElementNode(sibling)) { return null; } } return null; } function hasFormat(node, format) { return $isTextNode(node) && node.hasFormat(format); } function escapeLeadingAndTrailingWhitespaces(textContent) { return textContent.replace(/^\s+|\s+$/g, match => { return [...match].map(char => '&#' + char.codePointAt(0) + ';').join(''); }); } /** * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. * */ function findOutermostTextFormatTransformer(textNode, textFormatTransformersIndex) { const textContent = textNode.getTextContent(); const match = findOutermostMatch(textContent, textFormatTransformersIndex); if (!match) { return null; } const textFormatMatchStart = match.index || 0; const textFormatMatchEnd = textFormatMatchStart + match[0].length; const transformer = textFormatTransformersIndex.transformersByTag[match[1]]; return { endIndex: textFormatMatchEnd, match, startIndex: textFormatMatchStart, transformer }; } // Finds first "<tag>content<tag>" match that is not nested into another tag function findOutermostMatch(textContent, textTransformersIndex) { const openTagsMatch = textContent.match(textTransformersIndex.openTagsRegExp); if (openTagsMatch == null) { return null; } for (const match of openTagsMatch) { // Open tags reg exp might capture leading space so removing it // before using match to find transformer const tag = match.replace(/^\s/, ''); const fullMatchRegExp = textTransformersIndex.fullMatchRegExpByTag[tag]; if (fullMatchRegExp == null) { continue; } const fullMatch = textContent.match(fullMatchRegExp); const transformer = textTransformersIndex.transformersByTag[tag]; if (fullMatch != null && transformer != null) { if (transformer.intraword !== false) { return fullMatch; } // For non-intraword transformers checking if it's within a word // or surrounded with space/punctuation/newline const { index = 0 } = fullMatch; const beforeChar = textContent[index - 1]; const afterChar = textContent[index + fullMatch[0].length]; if ((!beforeChar || PUNCTUATION_OR_SPACE.test(beforeChar)) && (!afterChar || PUNCTUATION_OR_SPACE.test(afterChar))) { return fullMatch; } } } return null; } function importTextFormatTransformer(textNode, startIndex, endIndex, transformer, match) { const textContent = textNode.getTextContent(); // No text matches - we can safely process the text format match let transformedNode, nodeAfter, nodeBefore; // If matching full content there's no need to run splitText and can reuse existing textNode // to update its content and apply format. E.g. for **_Hello_** string after applying bold // format (**) it will reuse the same text node to apply italic (_) if (match[0] === textContent) { transformedNode = textNode; } else { if (startIndex === 0) { [transformedNode, nodeAfter] = textNode.splitText(endIndex); } else { [nodeBefore, transformedNode, nodeAfter] = textNode.splitText(startIndex, endIndex); } } transformedNode.setTextContent(match[2]); if (transformer) { for (const format of transformer.format) { if (!transformedNode.hasFormat(format)) { transformedNode.toggleFormat(format); } } } return { nodeAfter: nodeAfter, nodeBefore: nodeBefore, transformedNode: transformedNode }; } /** * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. * */ function findOutermostTextMatchTransformer(textNode_, textMatchTransformers) { const textNode = textNode_; let foundMatchStartIndex = undefined; let foundMatchEndIndex = undefined; let foundMatchTransformer = undefined; let foundMatch = undefined; for (const transformer of textMatchTransformers) { if (!transformer.replace || !transformer.importRegExp) { continue; } const match = textNode.getTextContent().match(transformer.importRegExp); if (!match) { continue; } const startIndex = match.index || 0; const endIndex = transformer.getEndIndex ? transformer.getEndIndex(textNode, match) : startIndex + match[0].length; if (endIndex === false) { continue; } if (foundMatchStartIndex === undefined || foundMatchEndIndex === undefined || // Wraps previous match or is strictly before it. startIndex < foundMatchStartIndex && (endIndex > foundMatchEndIndex || endIndex <= foundMatchStartIndex)) { foundMatchStartIndex = startIndex; foundMatchEndIndex = endIndex; foundMatchTransformer = transformer; foundMatch = match; } } if (foundMatchStartIndex === undefined || foundMatchEndIndex === undefined || foundMatchTransformer === undefined || foundMatch === undefined) { return null; } return { endIndex: foundMatchEndIndex, match: foundMatch, startIndex: foundMatchStartIndex, transformer: foundMatchTransformer }; } function importFoundTextMatchTransformer(textNode, startIndex, endIndex, transformer, match) { let transformedNode, nodeAfter, nodeBefore; if (startIndex === 0) { [transformedNode, nodeAfter] = textNode.splitText(endIndex); } else { [nodeBefore, transformedNode, nodeAfter] = textNode.splitText(startIndex, endIndex); } if (!transformer.replace) { return null; } const potentialTransformedNode = transformer.replace(transformedNode, match); return { nodeAfter, nodeBefore, transformedNode: potentialTransformedNode || undefined }; } /** * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. * */ /** * Returns true if the node can contain transformable markdown. * Code nodes cannot contain transformable markdown. * For example, `code **bold**` should not be transformed to * <code>code <strong>bold</strong></code>. */ function canContainTransformableMarkdown(node) { return $isTextNode(node) && !node.hasFormat('code'); } /** * Handles applying both text format and text match transformers. * It finds the outermost text format or text match and applies it, * then recursively calls itself to apply the next outermost transformer, * until there are no more transformers to apply. */ function importTextTransformers(textNode, textFormatTransformersIndex, textMatchTransformers) { let foundTextFormat = findOutermostTextFormatTransformer(textNode, textFormatTransformersIndex); let foundTextMatch = findOutermostTextMatchTransformer(textNode, textMatchTransformers); if (foundTextFormat && foundTextMatch) { // Find the outermost transformer if (foundTextFormat.startIndex <= foundTextMatch.startIndex && foundTextFormat.endIndex >= foundTextMatch.endIndex || // foundTextMatch is not contained within foundTextFormat foundTextMatch.startIndex > foundTextFormat.endIndex) { // foundTextFormat wraps foundTextMatch - apply foundTextFormat by setting foundTextMatch to null foundTextMatch = null; } else { // foundTextMatch wraps foundTextFormat - apply foundTextMatch by setting foundTextFormat to null foundTextFormat = null; } } if (foundTextFormat) { const result = importTextFormatTransformer(textNode, foundTextFormat.startIndex, foundTextFormat.endIndex, foundTextFormat.transformer, foundTextFormat.match); if (canContainTransformableMarkdown(result.nodeAfter)) { importTextTransformers(result.nodeAfter, textFormatTransformersIndex, textMatchTransformers); } if (canContainTransformableMarkdown(result.nodeBefore)) { importTextTransformers(result.nodeBefore, textFormatTransformersIndex, textMatchTransformers); } if (canContainTransformableMarkdown(result.transformedNode)) { importTextTransformers(result.transformedNode, textFormatTransformersIndex, textMatchTransformers); } } else if (foundTextMatch) { const result = importFoundTextMatchTransformer(textNode, foundTextMatch.startIndex, foundTextMatch.endIndex, foundTextMatch.transformer, foundTextMatch.match); if (!result) { return; } if (canContainTransformableMarkdown(result.nodeAfter)) { importTextTransformers(result.nodeAfter, textFormatTransformersIndex, textMatchTransformers); } if (canContainTransformableMarkdown(result.nodeBefore)) { importTextTransformers(result.nodeBefore, textFormatTransformersIndex, textMatchTransformers); } if (canContainTransformableMarkdown(result.transformedNode)) { importTextTransformers(result.transformedNode, textFormatTransformersIndex, textMatchTransformers); } } // Handle escape characters const textContent = textNode.getTextContent(); const escapedText = textContent.replace(/\\([*_`~\\])/g, '$1').replace(/&#(\d+);/g, (_, codePoint) => { return String.fromCodePoint(codePoint); }); textNode.setTextContent(escapedText); } /** * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. * */ /** * Renders markdown from a string. The selection is moved to the start after the operation. */ function createMarkdownImport(transformers, shouldPreserveNewLines = false) { const byType = transformersByType(transformers); const textFormatTransformersIndex = createTextFormatTransformersIndex(byType.textFormat); return (markdownString, node) => { const lines = markdownString.split('\n'); const linesLength = lines.length; const root = node || $getRoot(); root.clear(); for (let i = 0; i < linesLength; i++) { const lineText = lines[i]; const [imported, shiftedIndex] = $importMultiline(lines, i, byType.multilineElement, root); if (imported) { // If a multiline markdown element was imported, we don't want to process the lines that were part of it anymore. // There could be other sub-markdown elements (both multiline and normal ones) matching within this matched multiline element's children. // However, it would be the responsibility of the matched multiline transformer to decide how it wants to handle them. // We cannot handle those, as there is no way for us to know how to maintain the correct order of generated lexical nodes for possible children. i = shiftedIndex; // Next loop will start from the line after the last line of the multiline element continue; } $importBlocks(lineText, root, byType.element, textFormatTransformersIndex, byType.textMatch, shouldPreserveNewLines); } // By default, removing empty paragraphs as md does not really // allow empty lines and uses them as delimiter. // If you need empty lines set shouldPreserveNewLines = true. const children = root.getChildren(); for (const child of children) { if (!shouldPreserveNewLines && isEmptyParagraph(child) && root.getChildrenSize() > 1) { child.remove(); } } if ($getSelection() !== null) { root.selectStart(); } }; } /** * * @returns first element of the returned tuple is a boolean indicating if a multiline element was imported. The second element is the index of the last line that was processed. */ function $importMultiline(lines, startLineIndex, multilineElementTransformers, rootNode) { for (const transformer of multilineElementTransformers) { const { handleImportAfterStartMatch, regExpEnd, regExpStart, replace } = transformer; const startMatch = lines[startLineIndex].match(regExpStart); if (!startMatch) { continue; // Try next transformer } if (handleImportAfterStartMatch) { const result = handleImportAfterStartMatch({ lines, rootNode, startLineIndex, startMatch, transformer }); if (result === null) { continue; } else if (result) { return result; } } const regexpEndRegex = typeof regExpEnd === 'object' && 'regExp' in regExpEnd ? regExpEnd.regExp : regExpEnd; const isEndOptional = regExpEnd && typeof regExpEnd === 'object' && 'optional' in regExpEnd ? regExpEnd.optional : !regExpEnd; let endLineIndex = startLineIndex; const linesLength = lines.length; // check every single line for the closing match. It could also be on the same line as the opening match. while (endLineIndex < linesLength) { const endMatch = regexpEndRegex ? lines[endLineIndex].match(regexpEndRegex) : null; if (!endMatch) { if (!isEndOptional || isEndOptional && endLineIndex < linesLength - 1 // Optional end, but didn't reach the end of the document yet => continue searching for potential closing match ) { endLineIndex++; continue; // Search next line for closing match } } // Now, check if the closing match matched is the same as the opening match. // If it is, we need to continue searching for the actual closing match. if (endMatch && startLineIndex === endLineIndex && endMatch.index === startMatch.index) { endLineIndex++; continue; // Search next line for closing match } // At this point, we have found the closing match. Next: calculate the lines in between open and closing match // This should not include the matches themselves, and be split up by lines const linesInBetween = []; if (endMatch && startLineIndex === endLineIndex) { linesInBetween.push(lines[startLineIndex].slice(startMatch[0].length, -endMatch[0].length)); } else { for (let i = startLineIndex; i <= endLineIndex; i++) { if (i === startLineIndex) { const text = lines[i].slice(startMatch[0].length); linesInBetween.push(text); // Also include empty text } else if (i === endLineIndex && endMatch) { const text = lines[i].slice(0, -endMatch[0].length); linesInBetween.push(text); // Also include empty text } else { linesInBetween.push(lines[i]); } } } if (replace(rootNode, null, startMatch, endMatch, linesInBetween, true) !== false) { // Return here. This $importMultiline function is run line by line and should only process a single multiline element at a time. return [true, endLineIndex]; } // The replace function returned false, despite finding the matching open and close tags => this transformer does not want to handle it. // Thus, we continue letting the remaining transformers handle the passed lines of text from the beginning break; } } // No multiline transformer handled this line successfully return [false, startLineIndex]; } function $importBlocks(lineText, rootNode, elementTransformers, textFormatTransformersIndex, textMatchTransformers, shouldPreserveNewLines) { const textNode = $createTextNode(lineText); const elementNode = $createParagraphNode(); elementNode.append(textNode); rootNode.append(elementNode); for (const { regExp, replace } of elementTransformers) { const match = lineText.match(regExp); if (match) { textNode.setTextContent(lineText.slice(match[0].length)); if (replace(elementNode, [textNode], match, true) !== false) { break; } } } importTextTransformers(textNode, textFormatTransformersIndex, textMatchTransformers); // If no transformer found and we left with original paragraph node // can check if its content can be appended to the previous node // if it's a paragraph, quote or list if (elementNode.isAttached() && lineText.length > 0) { const previousNode = elementNode.getPreviousSibling(); if (!shouldPreserveNewLines && ( // Only append if we're not preserving newlines $isParagraphNode(previousNode) || $isQuoteNode(previousNode) || $isListNode(previousNode))) { let targetNode = previousNode; if ($isListNode(previousNode)) { const lastDescendant = previousNode.getLastDescendant(); if (lastDescendant == null) { targetNode = null; } else { targetNode = $findMatchingParent(lastDescendant, $isListItemNode); } } if (targetNode != null && targetNode.getTextContentSize() > 0) { targetNode.splice(targetNode.getChildrenSize(), 0, [$createLineBreakNode(), ...elementNode.getChildren()]); elementNode.remove(); } } } } function createTextFormatTransformersIndex(textTransformers) { const transformersByTag = {}; const fullMatchRegExpByTag = {}; const openTagsRegExp = []; const escapeRegExp = `(?<![\\\\])`; for (const transformer of textTransformers) { const { tag } = transformer; transformersByTag[tag] = transformer; const tagRegExp = tag.replace(/(\*|\^|\+)/g, '\\$1'); openTagsRegExp.push(tagRegExp); // Single-char tag (e.g. "*"), if (tag.length === 1) { if (tag === '`') { // Special handling for backticks - match content with escaped backticks fullMatchRegExpByTag[tag] = new RegExp(`(?<![\\\\\`])(\`)((?:\\\\\`|[^\`])+?)(\`)(?!\`)`); } else { fullMatchRegExpByTag[tag] = new RegExp(`(?<![\\\\${tagRegExp}])(${tagRegExp})((\\\\${tagRegExp})?.*?[^${tagRegExp}\\s](\\\\${tagRegExp})?)((?<!\\\\)|(?<=\\\\\\\\))(${tagRegExp})(?![\\\\${tagRegExp}])`); } } else { // Multi‐char tags (e.g. "**") fullMatchRegExpByTag[tag] = new RegExp(`(?<!\\\\)(${tagRegExp})((\\\\${tagRegExp})?.*?[^\\s](\\\\${tagRegExp})?)((?<!\\\\)|(?<=\\\\\\\\))(${tagRegExp})(?!\\\\)`); } } return { // Reg exp to find open tag + content + close tag fullMatchRegExpByTag, // Regexp to locate *any* potential opening tag (longest first). openTagsRegExp: new RegExp(`${escapeRegExp}(${openTagsRegExp.join('|')})`, 'g'), transformersByTag }; } /** * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. * */ // Do not require this module directly! Use normal `invariant` calls. function formatDevErrorMessage(message) { throw new Error(message); } /** * Copyright (c) Meta Platforms, Inc. and affiliates. * * This source code is licensed under the MIT license found in the * LICENSE file in the root directory of this source tree. * */ const ORDERED_LIST_REGEX = /^(\s*)(\d{1,})\.\s/; const UNORDERED_LIST_REGEX = /^(\s*)[-*+]\s/; const CHECK_LIST_REGEX = /^(\s*)(?:[-*+]\s)?\s?(\[(\s|x)?\])\s/i; const HEADING_REGEX = /^(#{1,6})\s/; const QUOTE_REGEX = /^>\s/; const CODE_START_REGEX = /^[ \t]*```([\w-]+)?/; const CODE_END_REGEX = /[ \t]*```$/; const CODE_SINGLE_LINE_REGEX = /^[ \t]*```[^`]+(?:(?:`{1,2}|`{4,})[^`]+)*```(?:[^`]|$)/; const TABLE_ROW_REG_EXP = /^(?:\|)(.+)(?:\|)\s?$/; const TABLE_ROW_DIVIDER_REG_EXP = /^(\| ?:?-*:? ?)+\|\s?$/; const TAG_START_REGEX = /^<[a-z_][\w-]*(?:\s[^<>]*)?\/?>/i; const TAG_END_REGEX = /^<\/[a-z_][\w-]*\s*>/i; const ENDS_WITH = regex => new RegExp(`(?:${regex.source})$`, regex.flags); const listMarkerState = createState('mdListMarker', { parse: v => typeof v === 'string' && /^[-*+]$/.test(v) ? v : '-' }); const createBlockNode = createNode => { return (parentNode, children, match, isImport) => { const node = createNode(match); node.append(...children); parentNode.replace(node); if (!isImport) { node.select(0, 0); } }; }; // Amount of spaces that define indentation level // TODO: should be an option const LIST_INDENT_SIZE = 4; function getIndent(whitespaces) { const tabs = whitespaces.match(/\t/g); const spaces = whitespaces.match(/ /g); let indent = 0; if (tabs) { indent += tabs.length; } if (spaces) { indent += Math.floor(spaces.length / LIST_INDENT_SIZE); } return indent; } const listReplace = listType => { return (parentNode, children, match, isImport) => { const previousNode = parentNode.getPreviousSibling(); const nextNode = parentNode.getNextSibling(); const listItem = $createListItemNode(listType === 'check' ? match[3] === 'x' : undefined); const firstMatchChar = match[0].trim()[0]; const listMarker = (listType === 'bullet' || listType === 'check') && firstMatchChar === listMarkerState.parse(firstMatchChar) ? firstMatchChar : undefined; if ($isListNode(nextNode) && nextNode.getListType() === listType) { if (listMarker) { $setState(nextNode, listMarkerState, listMarker); } const firstChild = nextNode.getFirstChild(); if (firstChild !== null) { firstChild.insertBefore(listItem); } else { // should never happen, but let's handle gracefully, just in case. nextNode.append(listItem); } parentNode.remove(); } else if ($isListNode(previousNode) && previousNode.getListType() === listType) { if (listMarker) { $setState(previousNode, listMarkerState, listMarker); } previousNode.append(listItem); parentNode.remove(); } else { const list = $createListNode(listType, listType === 'number' ? Number(match[2]) : undefined); if (listMarker) { $setState(list, listMarkerState, listMarker); } list.append(listItem); parentNode.replace(list); } listItem.append(...children); if (!isImport) { listItem.select(0, 0); } const indent = getIndent(match[1]); if (indent) { listItem.setIndent(indent); } }; }; const $listExport = (listNode, exportChildren, depth) => { const output = []; const children = listNode.getChildren(); let index = 0; for (const listItemNode of children) { if ($isListItemNode(listItemNode)) { if (listItemNode.getChildrenSize() === 1) { const firstChild = listItemNode.getFirstChild(); if ($isListNode(firstChild)) { output.push($listExport(firstChild, exportChildren, depth + 1)); continue; } } const indent = ' '.repeat(depth * LIST_INDENT_SIZE); const listType = listNode.getListType(); const listMarker = $getState(listNode, listMarkerState); const prefix = listType === 'number' ? `${listNode.getStart() + index}. ` : listType === 'check' ? `${listMarker} [${listItemNode.getChecked() ? 'x' : ' '}] ` : listMarker + ' '; output.push(indent + prefix + exportChildren(listItemNode)); index++; } } return output.join('\n'); }; const HEADING = { dependencies: [HeadingNode], export: (node, exportChildren) => { if (!$isHeadingNode(node)) { return null; } const level = Number(node.getTag().slice(1)); return '#'.repeat(level) + ' ' + exportChildren(node); }, regExp: HEADING_REGEX, replace: createBlockNode(match => { const tag = 'h' + match[1].length; return $createHeadingNode(tag); }), type: 'element' }; const QUOTE = { dependencies: [QuoteNode], export: (node, exportChildren) => { if (!$isQuoteNode(node)) { return null; } const lines = exportChildren(node).split('\n'); const output = []; for (const line of lines) { output.push('> ' + line); } return output.join('\n'); }, regExp: QUOTE_REGEX, replace: (parentNode, children, _match, isImport) => { if (isImport) { const previousNode = parentNode.getPreviousSibling(); if ($isQuoteNode(previousNode)) { previousNode.splice(previousNode.getChildrenSize(), 0, [$createLineBreakNode(), ...children]); parentNode.remove(); return; } } const node = $createQuoteNode(); node.append(...children); parentNode.replace(node); if (!isImport) { node.select(0, 0); } }, type: 'element' }; const CODE = { dependencies: [CodeNode], export: node => { if (!$isCodeNode(node)) { return null; } const textContent = node.getTextContent(); return '```' + (node.getLanguage() || '') + (textContent ? '\n' + textContent : '') + '\n' + '```'; }, regExpEnd: { optional: true, regExp: CODE_END_REGEX }, regExpStart: CODE_START_REGEX, replace: (rootNode, children, startMatch, endMatch, linesInBetween, isImport) => { let codeBlockNode; let code; if (!children && linesInBetween) { if (linesInBetween.length === 1) { // Single-line code blocks if (endMatch) { // End match on same line. Example: ```markdown hello```. markdown should not be considered the language here. codeBlockNode = $createCodeNode(); code = startMatch[1] + linesInBetween[0]; } else { // No end match. We should assume the language is next to the backticks and that code will be typed on the next line in the future codeBlockNode = $createCodeNode(startMatch[1]); code = linesInBetween[0].startsWith(' ') ? linesInBetween[0].slice(1) : linesInBetween[0]; } } else { // Treat multi-line code blocks as if they always have an end match codeBlockNode = $createCodeNode(startMatch[1]); if (linesInBetween[0].trim().length === 0) { // Filter out all start and end lines that are length 0 until we find the first line with content while (linesInBetween.length > 0 && !linesInBetween[0].length) { linesInBetween.shift(); } } else { // The first line already has content => Remove the first space of the line if it exists linesInBetween[0] = linesInBetween[0].startsWith(' ') ? linesInBetween[0].slice(1) : linesInBetween[0]; } // Filter out all end lines that are length 0 until we find the last line with content while (linesInBetween.length > 0 && !linesInBetween[linesInBetween.length - 1].length) { linesInBetween.pop(); } code = linesInBetween.join('\n'); } const textNode = $createTextNode(code); codeBlockNode.append(textNode); rootNode.append(codeBlockNode); } else if (children) { createBlockNode(match => { return $createCodeNode(match ? match[1] : undefined); })(rootNode, children, startMatch, isImport); } }, type: 'multiline-element' }; const UNORDERED_LIST = { dependencies: [ListNode, ListItemNode], export: (node, exportChildren) => { return $isListNode(node) ? $listExport(node, exportChildren, 0) : null; }, regExp: UNORDERED_LIST_REGEX, replace: listReplace('bullet'), type: 'element' }; const CHECK_LIST = { dependencies: [ListNode, ListItemNode], export: (node, exportChildren) => { return $isListNode(node) ? $listExport(node, exportChildren, 0) : null; }, regExp: CHECK_LIST_REGEX, replace: listReplace('check'), type: 'element' }; const ORDERED_LIST = { dependencies: [ListNode, ListItemNode], export: (node, exportChildren) => { return $isListNode(node) ? $listExport(node, exportChildren, 0) : null; }, regExp: ORDERED_LIST_REGEX, replace: listReplace('number'), type: 'element' }; const INLINE_CODE = { format: ['code'], tag: '`', type: 'text-format' }; const HIGHLIGHT = { format: ['highlight'], tag: '==', type: 'text-format' }; const BOLD_ITALIC_STAR = { format: ['bold', 'italic'], tag: '***', type: 'text-format' }; const BOLD_ITALIC_UNDERSCORE = { format: ['bold', 'italic'], intraword: false, tag: '___', type: 'text-format' }; const BOLD_STAR = { format: ['bold'], tag: '**', type: 'text-format' }; const BOLD_UNDERSCORE = { format: ['bold'], intraword: false, tag: '__', type: 'text-format' }; const STRIKETHROUGH = { format: ['strikethrough'], tag: '~~', type: 'text-format' }; const ITALIC_STAR = { format: ['italic'], tag: '*', type: 'text-format' }; const ITALIC_UNDERSCORE = { format: ['italic'], intraword: false, tag: '_', type: 'text-format' }; // Order of text transformers matters: // // - code should go first as it prevents any transformations inside // - then longer tags match (e.g. ** or __ should go before * or _) const LINK = { dependencies: [LinkNode], export: (node, exportChildren, exportFormat) => { if (!$isLinkNode(node) || $isAutoLinkNode(node)) { return null; } const title = node.getTitle(); const textContent = exportChildren(node); const linkContent = title ? `[${textContent}](${node.getURL()} "${title}")` : `[${textContent}](${node.getURL()})`; return linkContent; }, importRegExp: /(?:\[(.+?)\])(?:\((?:([^()\s]+)(?:\s"((?:[^"]*\\")*[^"]*)"\s*)?)\))/, regExp: /(?:\[(.+?)\])(?:\((?:([^()\s]+)(?:\s"((?:[^"]*\\")*[^"]*)"\s*)?)\))$/, replace: (textNode, match) => { const [, linkText, linkUrl, linkTitle] = match; const linkNode = $createLinkNode(linkUrl, { title: linkTitle }); const openBracketAmount = linkText.split('[').length - 1; const closeBracketAmount = linkText.split(']').length - 1; let parsedLinkText = linkText; let outsideLinkText = ''; if (openBracketAmount < closeBracketAmount) { return; } else if (openBracketAmount > closeBracketAmount) { const linkTextParts = linkText.split('['); outsideLinkText = '[' + linkTextParts[0]; parsedLinkText = linkTextParts.slice(1).join('['); } const linkTextNode = $createTextNode(parsedLinkText); linkTextNode.setFormat(textNode.getFormat()); linkNode.append(linkTextNode); textNode.replace(linkNode); if (outsideLinkText) { linkNode.insertBefore($createTextNode(outsideLinkText)); } return linkTextNode; }, trigger: ')', type: 'text-match' }; const ELEMENT_TRANSFORMERS = [HEADING, QUOTE, UNORDERED_LIST, ORDERED_LIST]; const MULTILINE_ELEMENT_TRANSFORMERS = [CODE]; // Order of text format transformers matters: // // - code should go first as it prevents any transformations inside // - then longer tags match (e.g. ** or __ should go before * or _) const TEXT_FORMAT_TRANSFORMERS = [INLINE_CODE, BOLD_ITALIC_STAR, BOLD_ITALIC_UNDERSCORE, BOLD_STAR, BOLD_UNDERSCORE, HIGHLIGHT, ITALIC_STAR, ITALIC_UNDERSCORE, STRIKETHROUGH]; const TEXT_MATCH_TRANSFORMERS = [LINK]; const TRANSFORMERS = [...ELEMENT_TRANSFORMERS, ...MULTILINE_ELEMENT_TRANSFORMERS, ...TEXT_FORMAT_TRANSFORMERS, ...TEXT_MATCH_TRANSFORMERS]; function normalizeMarkdown(input, shouldMergeAdjacentLines = false) { const lines = input.split('\n'); let inCodeBlock = false; const sanitizedLines = []; for (let i = 0; i < lines.length; i++) { const line = lines[i].trimEnd(); const lastLine = sanitizedLines[sanitizedLines.length - 1]; // Code blocks of ```single line``` don't toggle the inCodeBlock flag if (CODE_SINGLE_LINE_REGEX.test(line)) { sanitizedLines.push(line); continue; } // Detect the start or end of a code block if (CODE_START_REGEX.test(line) || CODE_END_REGEX.test(line)) { inCodeBlock = !inCodeBlock; sanitizedLines.push(line); continue; } // If we are inside a code block, keep the line unchanged if (inCodeBlock) { sanitizedLines.push(line); continue; } // In markdown the concept of "empty paragraphs" does not exist. // Blocks must be separated by an empty line. Non-empty adjacent lines must be merged. if (line === '' || lastLine === '' || !lastLine || HEADING_REGEX.test(lastLine) || HEADING_REGEX.test(line) || QUOTE_REGEX.test(line) || ORDERED_LIST_REGEX.test(line) || UNORDERED_LIST_REGEX.test(line) || CHECK_LIST_REGEX.test(line) || TABLE_ROW_REG_EXP.test(line) || TABLE_ROW_DIVIDER_REG_EXP.test(line) || !shouldMergeAdjacentLines || TAG_START_REGEX.test(line) || TAG_END_REGEX.test(line) || ENDS_WITH(TAG_END_REGEX).test(lastLine) || ENDS_WITH(TAG_START_REGEX).test(lastLine) || CODE_END_REGEX.test(lastLine)) { sanitizedLines.push(line); } else { sanitizedLines[sanitizedLines.length - 1] = lastLine + ' ' + line.trimStart(); } } return sanitizedLines.join('\n'); } function runElementTransformers(parentNode, anchorNode, anchorOffset, elementTransformers) { const grandParentNode = parentNode.getParent(); if (!$isRootOrShadowRoot(grandParentNode) || parentNode.getFirstChild() !== anchorNode) { return false; } const textContent = anchorNode.getTextContent(); // Checking for anchorOffset position to prevent any checks for cases when caret is too far // from a line start to be a part of block-level markdown trigger. // // TODO: // Can have a quick check if caret is close enough to the beginning of the string (e.g. offset less than 10-20) // since otherwise it won't be a markdown shortcut, but tables are exception if (textContent[anchorOffset - 1] !== ' ') { return false; } for (const { regExp, replace } of elementTransformers) { const match = textContent.match(regExp); if (match && match[0].length === (match[0].endsWith(' ') ? anchorOffset : anchorOffset - 1)) { const nextSiblings = anchorNode.getNextSiblings(); const [leadingNode, remainderNode] = anchorNode.splitText(anchorOffset); const siblings = remainderNode ? [remainderNode, ...nextSiblings] : nextSiblings; if (replace(parentNode, siblings, match, false) !== false) { leadingNode.remove(); return true; } } } return false; } function runMultilineElementTransformers(parentNode, anchorNode, anchorOffset, elementTransformers) { const grandParentNode = parentNode.getParent(); if (!$isRootOrShadowRoot(grandParentNode) || parentNode.getFirstChild() !== anchorNode) { return false; } const textContent = anchorNode.getTextContent(); // Checking for anchorOffset position to prevent any checks for cases when caret is too far // from a line start to be a part of block-level markdown trigger. // // TODO: // Can have a quick check if caret is close enough to the beginning of the string (e.g. offset less than 10-20) // since otherwise it won't be a markdown shortcut, but tables are exception if (textContent[anchorOffset - 1] !== ' ') { return false; } for (const { regExpStart, replace, regExpEnd } of elementTransformers) { if (regExpEnd && !('optional' in regExpEnd) || regExpEnd && 'optional' in regExpEnd && !regExpEnd.optional) { continue; } const match = textContent.match(regExpStart); if (match && match[0].length === (match[0].endsWith(' ') ? anchorOffset : anchorOffset - 1)) { const nextSiblings = anchorNode.getNextSiblings(); const [leadingNode, remainderNode] = anchorNode.splitText(anchorOffset); const siblings = remainderNode ? [remainderNode, ...nextSiblings] : nextSiblings; if (replace(parentNode, siblings, match, null, null, false) !== false) { leadingNode.remove(); return true; } } } return false; } function runTextMatchTransformers(anchorNode, anchorOffset, transformersByTrigger) { let textContent = anchorNode.getTextContent(); const lastChar = textContent[anchorOffset - 1]; const transformers = transformersByTrigger[lastChar]; if (transformers == null) { return false; } // If typing in the middle of content, remove the tail to do // reg exp match up to a string end (caret position) if (anchorOffset < textContent.length) { textContent = textContent.slice(0, anchorOffset); } for (const transformer of transformers) { if (!transformer.replace || !transformer.regExp) { continue; } const match = textContent.match(transformer.regExp); if (match === null) { continue; } const startIndex = match.index || 0; const endIndex = startIndex + match[0].length; let replaceNode; if (startIndex === 0) { [replaceNode] = anchorNode.splitText(endIndex); } else { [, replaceNode] = anchorNode.splitText(startIndex, endIndex); } replaceNode.selectNext(0, 0); transformer.replace(replaceNode, match); return true; } return false; } function $runTextFormatTransformers(anchorNode, anchorOffset, textFormatTransformers) { const textContent = anchorNode.getTextContent(); const closeTagEndIndex = anchorOffset - 1; const closeChar = textContent[closeTagEndIndex]; // Quick check if we're possibly at the end of inline markdown style const matchers = textFormatTransformers[closeChar]; if (!matchers) { return false; } for (const matcher of matchers) { const { tag } = matcher; const tagLength = tag.length; const closeTagStartIndex = closeTagEndIndex - tagLength + 1; // If tag is not single char check if rest of it matches with text content if (tagLength > 1) { if (!isEqualSubString(textContent, closeTagStartIndex, tag, 0, tagLength)) { continue; } } // Space before closing tag cancels inline markdown if (textContent[closeTagStartIndex - 1] === ' ') { continue; } // Some tags can not be used within words, hence should have newline/space/punctuation after it const afterCloseTagChar = textContent[closeTagEndIndex + 1]; if (matcher.intraword === false && afterCloseTagChar && !PUNCTUATION_OR_SPACE.test(afterCloseTagChar)) { continue; } const closeNode = anchorNode; let openNode = closeNode; let openTagStartIndex = getOpenTagStartIndex(textContent, closeTagStartIndex, tag); // Go through text node siblings and search for opening tag // if haven't found it within the same text node as closing tag let sibling = openNode; while (openTagStartIndex < 0 && (sibling = sibling.getPreviousSibling())) { if ($isLineBreakNode(sibling)) { break; } if ($isTextNode(sibling)) { if (sibling.hasFormat('code')) { continue; } const siblingTextContent = sibling.getTextContent(); openNode = sibling; openTagStartIndex = getOpenTagStartIndex(siblingTextContent, siblingTextContent.length, tag); } } // Opening tag is not found if (openTagStartIndex < 0) { continue; } // No content between opening and closing tag if (openNode === closeNode && openTagStartIndex + tagLength === closeTagStartIndex) { continue; } // Checking longer tags for repeating chars (e.g. *** vs **) const prevOpenNodeText = openNode.getTextContent(); if (openTagStartIndex > 0 && prevOpenNodeText[openTagStartIndex - 1] === closeChar) { continue; } // Some tags can not be used within words, hence should have newline/space/punctuation before it const beforeOpenTagChar = prevOpenNod