UNPKG

d2-ui

Version:
420 lines (358 loc) 12.1 kB
/** * Copyright (c) 2013-present, Facebook, Inc. * All rights reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. An additional grant * of patent rights can be found in the PATENTS file in the same directory. * * @providesModule convertFromHTMLToContentBlocks * @typechecks * */ 'use strict'; var CharacterMetadata = require('./CharacterMetadata'); var ContentBlock = require('./ContentBlock'); var DefaultDraftBlockRenderMap = require('./DefaultDraftBlockRenderMap'); var DraftEntity = require('./DraftEntity'); var Immutable = require('immutable'); var URI = require('fbjs/lib/URI'); var generateRandomKey = require('./generateRandomKey'); var getSafeBodyFromHTML = require('./getSafeBodyFromHTML'); var invariant = require('fbjs/lib/invariant'); var nullthrows = require('fbjs/lib/nullthrows'); var sanitizeDraftText = require('./sanitizeDraftText'); var List = Immutable.List; var OrderedSet = Immutable.OrderedSet; var NBSP = '&nbsp;'; var SPACE = ' '; // Arbitrary max indent var MAX_DEPTH = 4; // used for replacing characters in HTML var REGEX_CR = new RegExp('\r', 'g'); var REGEX_LF = new RegExp('\n', 'g'); var REGEX_NBSP = new RegExp(NBSP, 'g'); // Block tag flow is different because LIs do not have // a deterministic style ;_; var inlineTags = { b: 'BOLD', code: 'CODE', del: 'STRIKETHROUGH', em: 'ITALIC', i: 'ITALIC', s: 'STRIKETHROUGH', strike: 'STRIKETHROUGH', strong: 'BOLD', u: 'UNDERLINE' }; var lastBlock; function getEmptyChunk() { return { text: '', inlines: [], entities: [], blocks: [] }; } function getWhitespaceChunk(inEntity) { var entities = new Array(1); if (inEntity) { entities[0] = inEntity; } return { text: SPACE, inlines: [OrderedSet()], entities: entities, blocks: [] }; } function getSoftNewlineChunk() { return { text: '\n', inlines: [OrderedSet()], entities: new Array(1), blocks: [] }; } function getBlockDividerChunk(block, depth) { return { text: '\r', inlines: [OrderedSet()], entities: new Array(1), blocks: [{ type: block, depth: Math.max(0, Math.min(MAX_DEPTH, depth)) }] }; } function getListBlockType(tag, lastList) { if (tag === 'li') { return lastList === 'ol' ? 'ordered-list-item' : 'unordered-list-item'; } return null; } function getBlockMapSupportedTags(blockRenderMap) { var unstyledElement = blockRenderMap.get('unstyled').element; return blockRenderMap.map(function (config) { return config.element; }).valueSeq().toSet().filter(function (tag) { return tag !== unstyledElement; }).toArray().sort(); } // custom element conversions function getMultiMatchedType(tag, lastList, multiMatchExtractor) { for (var ii = 0; ii < multiMatchExtractor.length; ii++) { var matchType = multiMatchExtractor[ii](tag, lastList); if (matchType) { return matchType; } } return null; } function getBlockTypeForTag(tag, lastList, blockRenderMap) { var matchedTypes = blockRenderMap.filter(function (config) { return config.element === tag || config.wrapper === tag; }).keySeq().toSet().toArray().sort(); // if we dont have any matched type, return unstyled // if we have one matched type return it // if we have multi matched types use the multi-match function to gather type switch (matchedTypes.length) { case 0: return 'unstyled'; case 1: return matchedTypes[0]; default: return getMultiMatchedType(tag, lastList, [getListBlockType]) || 'unstyled'; } } function processInlineTag(tag, node, currentStyle) { var styleToCheck = inlineTags[tag]; if (styleToCheck) { currentStyle = currentStyle.add(styleToCheck).toOrderedSet(); } else if (node instanceof HTMLElement) { (function () { var htmlElement = node; currentStyle = currentStyle.withMutations(function (style) { if (htmlElement.style.fontWeight === 'bold') { style.add('BOLD'); } if (htmlElement.style.fontStyle === 'italic') { style.add('ITALIC'); } if (htmlElement.style.textDecoration === 'underline') { style.add('UNDERLINE'); } if (htmlElement.style.textDecoration === 'line-through') { style.add('STRIKETHROUGH'); } }).toOrderedSet(); })(); } return currentStyle; } function joinChunks(A, B) { // Sometimes two blocks will touch in the DOM and we need to strip the // extra delimiter to preserve niceness. var lastInB = B.text.slice(0, 1); if (A.text.slice(-1) === '\r' && lastInB === '\r') { A.text = A.text.slice(0, -1); A.inlines.pop(); A.entities.pop(); A.blocks.pop(); } // Kill whitespace after blocks if (A.text.slice(-1) === '\r') { if (B.text === SPACE || B.text === '\n') { return A; } else if (lastInB === SPACE || lastInB === '\n') { B.text = B.text.slice(1); B.inlines.shift(); B.entities.shift(); } } return { text: A.text + B.text, inlines: A.inlines.concat(B.inlines), entities: A.entities.concat(B.entities), blocks: A.blocks.concat(B.blocks) }; } /** * Check to see if we have anything like <p> <blockquote> <h1>... to create * block tags from. If we do, we can use those and ignore <div> tags. If we * don't, we can treat <div> tags as meaningful (unstyled) blocks. */ function containsSemanticBlockMarkup(html, blockTags) { return blockTags.some(function (tag) { return html.indexOf('<' + tag) !== -1; }); } function hasValidLinkText(link) { !(link instanceof HTMLAnchorElement) ? process.env.NODE_ENV !== 'production' ? invariant(false, 'Link must be an HTMLAnchorElement.') : invariant(false) : undefined; var protocol = link.protocol; return protocol === 'http:' || protocol === 'https:'; } function genFragment(node, inlineStyle, lastList, inBlock, blockTags, depth, blockRenderMap, inEntity) { var nodeName = node.nodeName.toLowerCase(); var newBlock = false; var nextBlockType = 'unstyled'; var lastLastBlock = lastBlock; // Base Case if (nodeName === '#text') { var text = node.textContent; if (text.trim() === '' && inBlock !== 'pre') { return getWhitespaceChunk(inEntity); } if (inBlock !== 'pre') { // Can't use empty string because MSWord text = text.replace(REGEX_LF, SPACE); } // save the last block so we can use it later lastBlock = nodeName; return { text: text, inlines: Array(text.length).fill(inlineStyle), entities: Array(text.length).fill(inEntity), blocks: [] }; } // save the last block so we can use it later lastBlock = nodeName; // BR tags if (nodeName === 'br') { if (lastLastBlock === 'br' && (!inBlock || getBlockTypeForTag(inBlock, lastList, blockRenderMap) === 'unstyled')) { return getBlockDividerChunk('unstyled', depth); } return getSoftNewlineChunk(); } var chunk = getEmptyChunk(); var newChunk = null; // Inline tags inlineStyle = processInlineTag(nodeName, node, inlineStyle); // Handle lists if (nodeName === 'ul' || nodeName === 'ol') { if (lastList) { depth += 1; } lastList = nodeName; } // Block Tags if (!inBlock && blockTags.indexOf(nodeName) !== -1) { chunk = getBlockDividerChunk(getBlockTypeForTag(nodeName, lastList, blockRenderMap), depth); inBlock = nodeName; newBlock = true; } else if (lastList && inBlock === 'li' && nodeName === 'li') { chunk = getBlockDividerChunk(getBlockTypeForTag(nodeName, lastList, blockRenderMap), depth); inBlock = nodeName; newBlock = true; nextBlockType = lastList === 'ul' ? 'unordered-list-item' : 'ordered-list-item'; } // Recurse through children var child = node.firstChild; if (child != null) { nodeName = child.nodeName.toLowerCase(); } var entityId = null; var href = null; while (child) { if (nodeName === 'a' && child.href && hasValidLinkText(child)) { href = new URI(child.href).toString(); entityId = DraftEntity.create('LINK', 'MUTABLE', { url: href }); } else { entityId = undefined; } newChunk = genFragment(child, inlineStyle, lastList, inBlock, blockTags, depth, blockRenderMap, entityId || inEntity); chunk = joinChunks(chunk, newChunk); var sibling = child.nextSibling; // Put in a newline to break up blocks inside blocks if (sibling && blockTags.indexOf(nodeName) >= 0 && inBlock) { chunk = joinChunks(chunk, getSoftNewlineChunk()); } if (sibling) { nodeName = sibling.nodeName.toLowerCase(); } child = sibling; } if (newBlock) { chunk = joinChunks(chunk, getBlockDividerChunk(nextBlockType, depth)); } return chunk; } function getChunkForHTML(html, DOMBuilder, blockRenderMap) { html = html.trim().replace(REGEX_CR, '').replace(REGEX_NBSP, SPACE); var supportedBlockTags = getBlockMapSupportedTags(blockRenderMap); var safeBody = DOMBuilder(html); if (!safeBody) { return null; } lastBlock = null; // Sometimes we aren't dealing with content that contains nice semantic // tags. In this case, use divs to separate everything out into paragraphs // and hope for the best. var workingBlocks = containsSemanticBlockMarkup(html, supportedBlockTags) ? supportedBlockTags : ['div']; // Start with -1 block depth to offset the fact that we are passing in a fake // UL block to start with. var chunk = genFragment(safeBody, OrderedSet(), 'ul', null, workingBlocks, -1, blockRenderMap); // join with previous block to prevent weirdness on paste if (chunk.text.indexOf('\r') === 0) { chunk = { text: chunk.text.slice(1), inlines: chunk.inlines.slice(1), entities: chunk.entities.slice(1), blocks: chunk.blocks }; } // Kill block delimiter at the end if (chunk.text.slice(-1) === '\r') { chunk.text = chunk.text.slice(0, -1); chunk.inlines = chunk.inlines.slice(0, -1); chunk.entities = chunk.entities.slice(0, -1); chunk.blocks.pop(); } // If we saw no block tags, put an unstyled one in if (chunk.blocks.length === 0) { chunk.blocks.push({ type: 'unstyled', depth: 0 }); } // Sometimes we start with text that isn't in a block, which is then // followed by blocks. Need to fix up the blocks to add in // an unstyled block for this content if (chunk.text.split('\r').length === chunk.blocks.length + 1) { chunk.blocks.unshift({ type: 'unstyled', depth: 0 }); } return chunk; } function convertFromHTMLtoContentBlocks(html) { var DOMBuilder = arguments.length <= 1 || arguments[1] === undefined ? getSafeBodyFromHTML : arguments[1]; var blockRenderMap = arguments.length <= 2 || arguments[2] === undefined ? DefaultDraftBlockRenderMap : arguments[2]; // Be ABSOLUTELY SURE that the dom builder you pass hare won't execute // arbitrary code in whatever environment you're running this in. For an // example of how we try to do this in-browser, see getSafeBodyFromHTML. var chunk = getChunkForHTML(html, DOMBuilder, blockRenderMap); if (chunk == null) { return null; } var start = 0; return chunk.text.split('\r').map(function (textBlock, ii) { // Make absolutely certain that our text is acceptable. textBlock = sanitizeDraftText(textBlock); var end = start + textBlock.length; var inlines = nullthrows(chunk).inlines.slice(start, end); var entities = nullthrows(chunk).entities.slice(start, end); var characterList = List(inlines.map(function (style, ii) { var data = { style: style, entity: null }; if (entities[ii]) { data.entity = entities[ii]; } return CharacterMetadata.create(data); })); start = end + 1; return new ContentBlock({ key: generateRandomKey(), type: nullthrows(chunk).blocks[ii].type, depth: nullthrows(chunk).blocks[ii].depth, text: textBlock, characterList: characterList }); }); } module.exports = convertFromHTMLtoContentBlocks;