UNPKG

@atlaskit/editor-common

Version:

A package that contains common classes and components for editor and renderer

179 lines (175 loc) 6.92 kB
import { defaultSchema } from '@atlaskit/adf-schema/schema-default'; import { DOMParser } from '@atlaskit/editor-prosemirror/model'; const domParser = DOMParser.fromSchema(defaultSchema); export const getNestingRulesFromSchema = () => { const KEYWORDS = ['inline', 'block', 'text', 'leaf', 'group', 'unsupportedBlock', 'unsupportedInline']; const rules = {}; for (const nodeType of Object.keys(defaultSchema.nodes)) { var _defaultSchema$nodes$; const contentStr = (_defaultSchema$nodes$ = defaultSchema.nodes[nodeType]) === null || _defaultSchema$nodes$ === void 0 ? void 0 : _defaultSchema$nodes$.spec.content; if (!contentStr) { continue; } const allowedChildren = // eslint-disable-next-line require-unicode-regexp (String(contentStr).match(/\b([a-zA-Z_][a-zA-Z0-9_]*)\b/g) || []).filter((match, index, arr) => !KEYWORDS.includes(match) && defaultSchema.nodes[match] && arr.indexOf(match) === index); if (allowedChildren.length > 0) { rules[nodeType] = allowedChildren; } } return rules; }; const NESTING_RULES = getNestingRulesFromSchema(); const canContainChildren = nodeType => { var _NESTING_RULES$nodeTy; return !!((_NESTING_RULES$nodeTy = NESTING_RULES[nodeType]) !== null && _NESTING_RULES$nodeTy !== void 0 && _NESTING_RULES$nodeTy.length); }; const isAllowedChild = (parent, child) => { var _NESTING_RULES$parent, _NESTING_RULES$parent2; return (_NESTING_RULES$parent = (_NESTING_RULES$parent2 = NESTING_RULES[parent]) === null || _NESTING_RULES$parent2 === void 0 ? void 0 : _NESTING_RULES$parent2.includes(child)) !== null && _NESTING_RULES$parent !== void 0 ? _NESTING_RULES$parent : false; }; const shouldApplyMark = tag => domParser.rules.some(rule => 'mark' in rule && rule.tag === tag); const isBlockElement = tag => domParser.rules.some(rule => rule.tag === tag && rule.node); const getMarkTypes = tags => { const seen = new Set(); const marks = []; for (const tag of tags) { var _domParser$rules$find; const markType = (_domParser$rules$find = domParser.rules.find(rule => rule.tag === tag)) === null || _domParser$rules$find === void 0 ? void 0 : _domParser$rules$find.mark; if (markType && !seen.has(markType)) { marks.push({ type: markType }); seen.add(markType); } } return marks; }; const createParagraph = (content = []) => ({ type: 'paragraph', content }); const getBlockType = tag => { const rule = domParser.rules.find(r => r.tag === tag); return rule && 'node' in rule && rule.node ? rule.node : 'paragraph'; }; const createTextNode = (text, marks) => { const markTypes = getMarkTypes(marks); return { type: 'text', text, ...(markTypes.length > 0 && { marks: markTypes }) }; }; const addText = (text, marks, content) => { if (text) { content.push(createTextNode(text, marks)); } }; const handleBlockElement = (tag, innerContent, marks, blocks, currentParagraphContent, parseNode, nestedContainer) => { // Push any accumulated content as paragraph before block element if (currentParagraphContent.length > 0) { blocks.push(createParagraph([...currentParagraphContent])); currentParagraphContent.length = 0; } const blockType = getBlockType(tag); const newMarks = shouldApplyMark(tag) ? [...marks, tag] : marks; const parentNodeType = nestedContainer === null || nestedContainer === void 0 ? void 0 : nestedContainer.parentType; if (nestedContainer && parentNodeType && isAllowedChild(parentNodeType, blockType)) { const elementContent = []; const childNestedContainer = canContainChildren(blockType) ? { parentType: blockType, children: elementContent } : nestedContainer; parseNode(innerContent, newMarks, childNestedContainer); if (currentParagraphContent.length > 0) { elementContent.push(createParagraph([...currentParagraphContent])); currentParagraphContent.length = 0; } nestedContainer.children.push({ type: blockType, content: elementContent.length > 0 ? elementContent : [createParagraph()] }); } else if (canContainChildren(blockType)) { const children = []; parseNode(innerContent, newMarks, { parentType: blockType, children }); blocks.push({ type: blockType, content: children.length > 0 ? children : [createParagraph()] }); } else { // Regular block elements parseNode(innerContent, newMarks, nestedContainer); // Push content generated from block parsing if (currentParagraphContent.length > 0) { if (blockType === 'paragraph' || blockType === 'codeBlock') { blocks.push({ type: blockType, content: [...currentParagraphContent] }); } currentParagraphContent.length = 0; } } }; const handleInlineElement = (tag, innerContent, marks, parseNode, nestedContainer) => { const newMarks = shouldApplyMark(tag) ? [...marks, tag] : marks; parseNode(innerContent, newMarks, nestedContainer); }; /** * Simple SSR-compatible parser that recognises text wrapped in HTML elements * and extracts their content as ADF. * * Designed specifically for parsing i18n strings for ADF which specifically need to be * HTML strings for translation. * * Supports nested structures automatically derived from the ADF schema: * - Lists: ul/ol → li (listItem) * - Tables: table → tr (tableRow) → td/th (tableCell/tableHeader) * - Paragraphs, code blocks, and text marks * - Any other nested structures defined in the schema * * @param html - The HTML string to parse * @returns ADF DocNode containing the parsed content */ export const parseHTMLTextContent = html => { const blocks = []; const currentParagraphContent = []; // Simple regex-based parser that works in both SSR and browser const parseNode = (content, marks = [], nestedContainer) => { // Match HTML tags and text content // eslint-disable-next-line require-unicode-regexp const tagRegex = /<\s*(\w+)([^>]*)>([\s\S]*?)<\s*\/\s*\1\s*>|([^<]+)/g; let match = tagRegex.exec(content); while (match !== null) { if (match[4]) { addText(match[4], marks, currentParagraphContent); } else { // HTML element const tag = match[1].toLowerCase(); const innerContent = match[3]; if (isBlockElement(tag)) { handleBlockElement(tag, innerContent, marks, blocks, currentParagraphContent, parseNode, nestedContainer); } else { handleInlineElement(tag, innerContent, marks, parseNode, nestedContainer); } } match = tagRegex.exec(content); } }; parseNode(html); // Push any remaining content if (currentParagraphContent.length > 0) { blocks.push(createParagraph(currentParagraphContent)); } return { type: 'doc', version: 1, content: blocks.length > 0 ? blocks : [createParagraph()] }; };