UNPKG

@awayjs/scene

Version:
427 lines (426 loc) 19.7 kB
import { parse } from 'node-html-parser'; var MNEMOS = [ /*{ test: /\&apos;/g, replace: '\'' },*/ { test: /\&gt;/g, replace: '\>' } ]; var HTMLTextProcessor = /** @class */ (function () { function HTMLTextProcessor() { } HTMLTextProcessor.get = function () { if (!HTMLTextProcessor.instance) HTMLTextProcessor.instance = new HTMLTextProcessor(); return HTMLTextProcessor.instance; }; HTMLTextProcessor.prototype.processHTML = function (target_tf, input) { //console.log("html in", input); //input = input.replace(new RegExp('&gt;', 'g'), ' '); input = input.replace(/&quot;/g, '\"'); input = input.replace(/&amp;/g, '\&'); input = input.replace(/&apos;/g, '\''); input = input.replace(new RegExp('&nbsp;', 'g'), ' '); input = input.replace(new RegExp('√', 'g'), String.fromCharCode(8730)); input = input.replace(new RegExp('×', 'g'), String.fromCharCode(215)); input = input.replace(new RegExp('<\\\\', 'g'), '</'); input = input.replace(new RegExp('<br>', 'g'), '<br/>'); input = input.replace(new RegExp('<BR>', 'g'), '<br/>'); input = input.replace(new RegExp('<BR/>', 'g'), '<br/>'); input = input.replace(new RegExp('<B', 'g'), '<b'); input = input.replace(new RegExp('</B>', 'g'), '</b>'); input = input.replace(new RegExp('<I', 'g'), '<i'); input = input.replace(new RegExp('</I>', 'g'), '</i>'); input = input.replace(new RegExp('<P', 'g'), '<p'); input = input.replace(new RegExp('</P>', 'g'), '</p>'); input = input.replace(new RegExp('<U', 'g'), '<u'); input = input.replace(new RegExp('</U>', 'g'), '</u>'); input = input.replace(new RegExp('<LI', 'g'), '<li'); input = input.replace(new RegExp('</LI>', 'g'), '</li>'); input = input.replace(new RegExp('<FONT', 'g'), '<font'); input = input.replace(new RegExp('</FONT>', 'g'), '</font>'); input = input.replace(new RegExp('& ', 'g'), '&amp; '); // some preprocessing to make sure that html-tags are closing // to some degree FP seem to auto-close tags, so we need to do the same // (ugly code follows) // @todo: this can probably be done better var cnt = 0; var openTags = []; var insertAt = []; var insert = []; while (cnt < input.length) { if (input[cnt] == '<') { if (input[cnt + 1] == 'p') { //console.log("html p"); openTags[openTags.length] = 'p'; } else if (input[cnt + 1] == 'b' && input[cnt + 2] != 'r') { //console.log("html b"); openTags[openTags.length] = 'b'; } else if (input[cnt + 1] == 'i') { //console.log("html i"); openTags[openTags.length] = 'i'; } else if (input[cnt + 1] == 'u') { //console.log("html i"); openTags[openTags.length] = 'u'; } else if (input[cnt + 1] == 'f' && input[cnt + 2] == 'o' && input[cnt + 3] == 'n' && input[cnt + 4] == 't') { //console.log("html font"); openTags[openTags.length] = 'font'; cnt += 2; } else if (input[cnt + 1] == 'l' && input[cnt + 2] == 'i') { //console.log("html font"); openTags[openTags.length] = 'li'; cnt++; } else if (input[cnt + 1] == '/' && input[cnt + 2] == 'p') { var c_1 = openTags.length; var lastOpenTag = -1; while (c_1 > 0) { c_1--; if (openTags[c_1] == 'p') { lastOpenTag = c_1; break; } } if (lastOpenTag < 0) { insertAt[insertAt.length] = cnt; insert[insert.length] = 4; } else { c_1 = openTags.length - 1; while (c_1 > lastOpenTag) { insertAt[insertAt.length] = cnt; insert[insert.length] = '</' + openTags[c_1] + '>'; openTags.pop(); c_1--; } openTags.pop(); } } else if (input[cnt + 1] == '/' && input[cnt + 2] == 'b') { var c_2 = openTags.length; var lastOpenTag = -1; while (c_2 > 0) { c_2--; if (openTags[c_2] == 'b') { lastOpenTag = c_2; break; } } if (lastOpenTag < 0) { insertAt[insertAt.length] = cnt; insert[insert.length] = 4; } else { c_2 = openTags.length - 1; while (c_2 > lastOpenTag) { insertAt[insertAt.length] = cnt; insert[insert.length] = '</' + openTags[c_2] + '>'; openTags.pop(); c_2--; } openTags.pop(); } } else if (input[cnt + 1] == '/' && input[cnt + 2] == 'i') { var c_3 = openTags.length; var lastOpenTag = -1; while (c_3 > 0) { c_3--; if (openTags[c_3] == 'i') { lastOpenTag = c_3; break; } } if (lastOpenTag < 0) { insertAt[insertAt.length] = cnt; insert[insert.length] = 4; } else { c_3 = openTags.length - 1; while (c_3 > lastOpenTag) { insertAt[insertAt.length] = cnt; insert[insert.length] = '</' + openTags[c_3] + '>'; openTags.pop(); c_3--; } openTags.pop(); } } else if (input[cnt + 1] == '/' && input[cnt + 2] == 'u') { var c_4 = openTags.length; var lastOpenTag = -1; while (c_4 > 0) { c_4--; if (openTags[c_4] == 'u') { lastOpenTag = c_4; break; } } if (lastOpenTag < 0) { insertAt[insertAt.length] = cnt; insert[insert.length] = 4; } else { c_4 = openTags.length - 1; while (c_4 > lastOpenTag) { insertAt[insertAt.length] = cnt; insert[insert.length] = '</' + openTags[c_4] + '>'; openTags.pop(); c_4--; } openTags.pop(); } } else if (input[cnt + 1] == '/' && input[cnt + 2] == 'l' && input[cnt + 3] == 'i') { var c_5 = openTags.length; var lastOpenTag = -1; while (c_5 > 0) { c_5--; if (openTags[c_5] == 'li') { lastOpenTag = c_5; break; } } if (lastOpenTag < 0) { insertAt[insertAt.length] = cnt; insert[insert.length] = 7; } else { c_5 = openTags.length - 1; while (c_5 > lastOpenTag) { insertAt[insertAt.length] = cnt; insert[insert.length] = '</' + openTags[c_5] + '>'; openTags.pop(); c_5--; } openTags.pop(); } } else if (input[cnt + 1] == '/' && input[cnt + 2] == 'f' && input[cnt + 3] == 'o' && input[cnt + 4] == 'n' && input[cnt + 5] == 't') { var c_6 = openTags.length; var lastOpenTag = -1; while (c_6 > 0) { c_6--; if (openTags[c_6] == 'font') { lastOpenTag = c_6; break; } } if (lastOpenTag < 0) { insertAt[insertAt.length] = cnt; insert[insert.length] = 7; } else { c_6 = openTags.length - 1; while (c_6 > lastOpenTag) { insertAt[insertAt.length] = cnt; insert[insert.length] = '</' + openTags[c_6] + '>'; openTags.pop(); c_6--; } openTags.pop(); } } cnt++; } else { cnt++; } } var c = openTags.length; while (c > 0) { c--; insertAt[insertAt.length] = cnt; insert[insert.length] = '</' + openTags[c] + '>'; } var additional = 0; var len = insert.length; for (var i = 0; i < len; i++) { if (typeof insert[i] === 'number') { input = input.slice(0, insertAt[i] + additional) + input.slice(insertAt[i] + additional + insert[i]); additional -= insert[i]; } else { input = input.slice(0, insertAt[i] + additional) + insert[i] + input.slice(insertAt[i] + additional); additional += insert[i].length; } } var textProps = { text: '' }; var childFormat = target_tf.newTextFormat; target_tf._textFormats = [childFormat]; target_tf._textFormatsIdx = [0]; var doc = parse(input); if (doc && doc.firstChild) { textProps.multiline = doc.firstChild.childNodes.length > 0; this.readHTMLTextPropertiesRecursive(target_tf, doc, textProps, childFormat); } if (textProps.text != '' && ((textProps.text.charCodeAt(textProps.text.length - 1) == 13) || (textProps.text.charCodeAt(textProps.text.length - 1) == 10))) { textProps.text = textProps.text.slice(0, textProps.text.length - 1); } if (textProps.text != '' && (textProps.text.length >= 2 && textProps.text[textProps.text.length - 1] == 'n' && textProps.text[textProps.text.length - 2] == '\\')) { textProps.text = textProps.text.slice(0, textProps.text.length - 2); } if (textProps.text != '' && (textProps.text.length >= 2 && textProps.text[textProps.text.length - 1] == 'n' && textProps.text[textProps.text.length - 2] == '\\')) { textProps.text = textProps.text.slice(0, textProps.text.length - 2); } target_tf._textFormatsIdx[target_tf._textFormatsIdx.length - 1] = textProps.text.length; return textProps.text; }; HTMLTextProcessor.prototype.readHTMLTextPropertiesRecursive = function (target_tf, myChild, textProps, currentFormat) { var newProps_values = []; var newProps_names = []; // step 1 : collect the textformat-properties provided by this child.attributes if (myChild.attributes) { if (myChild.attributes.size || myChild.attributes.SIZE) { var value = myChild.attributes.size ? myChild.attributes.size : myChild.attributes.SIZE; value = value.replace(/[^0-9.]/g, ''); newProps_values[newProps_values.length] = parseInt(value); newProps_names[newProps_names.length] = 'size'; } if (myChild.attributes.color || myChild.attributes.COLOR) { var value = myChild.attributes.color ? myChild.attributes.color : myChild.attributes.COLOR; value = value.replace('#', '0x'); newProps_values[newProps_values.length] = parseInt(value); newProps_names[newProps_names.length] = 'color'; } if (myChild.attributes.indent || myChild.attributes.INDENT) { var value = myChild.attributes.indent ? myChild.attributes.indent : myChild.attributes.INDENT; value = value.replace(/[^0-9.]/g, ''); newProps_values[newProps_values.length] = parseInt(value); newProps_names[newProps_names.length] = 'indent'; } if (myChild.attributes.leftMargin || myChild.attributes.LEFTMARGIN) { var value = myChild.attributes.leftMargin ? myChild.attributes.leftMargin : myChild.attributes.LEFTMARGIN; value = value.replace(/[^0-9.]/g, ''); newProps_values[newProps_values.length] = parseInt(value); newProps_names[newProps_names.length] = 'leftMargin'; } if (myChild.attributes.rightMargin || myChild.attributes.RIGHTMARGIN) { var value = myChild.attributes.rightMargin ? myChild.attributes.rightMargin : myChild.attributes.RIGHTMARGIN; value = value.replace(/[^0-9.]/g, ''); newProps_values[newProps_values.length] = parseInt(value); newProps_names[newProps_names.length] = 'rightMargin'; } if (myChild.attributes.align || myChild.attributes.ALIGN) { var value = myChild.attributes.align ? myChild.attributes.align : myChild.attributes.ALIGN; newProps_values[newProps_values.length] = value; newProps_names[newProps_names.length] = 'align'; } if (myChild.attributes.face || myChild.attributes.FACE) { var value = myChild.attributes.face ? myChild.attributes.face : myChild.attributes.FACE; newProps_values[newProps_values.length] = value; newProps_names[newProps_names.length] = 'font_name'; } } // step2: collect the textformat-properties provided by this child.tagName if (myChild.tagName == 'b' || myChild.rawTagName == 'b') { if (!currentFormat.bold) { newProps_values[newProps_values.length] = true; newProps_names[newProps_names.length] = 'bold'; } } else if (myChild.tagName == 'i' || myChild.rawTagName == 'i') { if (!currentFormat.italic) { newProps_values[newProps_values.length] = true; newProps_names[newProps_names.length] = 'italic'; } } else if (myChild.tagName == 'u' || myChild.rawTagName == 'u') { if (!currentFormat.underline) { newProps_values[newProps_values.length] = true; newProps_names[newProps_names.length] = 'underline'; } } else if (myChild.tagName == 'font' || myChild.rawTagName == 'font') { // @todo - is this one even executing in any case ? (we already support "face" attribute) } else if (myChild.tagName == 'li' || myChild.rawTagName == 'li') { textProps.text += '\n ● '; } else if ((myChild.tagName == 'br' || myChild.rawTagName == 'br') && target_tf.multiline) { textProps.text += '\n'; } else if ((myChild.tagName == 'sbr' || myChild.rawTagName == 'sbr') && target_tf.multiline) { textProps.text += '\n'; } // step 3 : compare new properties against properties of current Textformat // decide if we need to add a new (merged) TextFormat var cloneFormat = false; var i = newProps_values.length; while (i > 0) { i--; if (currentFormat[newProps_names[i]] != newProps_values[i]) { cloneFormat = true; break; } } var childFormat = currentFormat; if (cloneFormat) { childFormat = currentFormat.clone(); i = newProps_values.length; while (i > 0) { i--; childFormat[newProps_names[i]] = newProps_values[i]; } target_tf._textFormats.push(childFormat); target_tf._textFormatsIdx[target_tf._textFormatsIdx.length - 1] = textProps.text.length; target_tf._textFormatsIdx.push(textProps.text.length); } if (myChild.childNodes && myChild.childNodes.length > 0) { // this is a container node // for container nodes, we traverse children, but do not add any text for the parent node // @todo: double check if above behavior is true for html text for (var k = 0; k < myChild.childNodes.length; k++) { if (target_tf._textFormats[target_tf._textFormats.length - 1] != childFormat) { target_tf._textFormats.push(childFormat); target_tf._textFormatsIdx[target_tf._textFormatsIdx.length - 1] = textProps.text.length; target_tf._textFormatsIdx.push(textProps.text.length); } this.readHTMLTextPropertiesRecursive(target_tf, myChild.childNodes[k], textProps, childFormat); } } else { // this is a content node - add the text to output // if a nodes content contains only line-breaks or whitespace, FP seem to ignore it var testContent = myChild.text.replace(/[\s\r\n]/gi, ''); if (testContent != '') { var newText = myChild.text; for (var _i = 0, MNEMOS_1 = MNEMOS; _i < MNEMOS_1.length; _i++) { var m = MNEMOS_1[_i]; newText = newText.replace(m.test, m.replace); } textProps.text += newText; } } if (myChild.tagName == 'ul' || myChild.rawTagName == 'ul' || myChild.tagName == 'li' || myChild.rawTagName == 'li') { //textProps.text = '\n' + textProps.text; } else if (myChild.tagName == 'p' || myChild.rawTagName == 'p') { textProps.text += '\n'; } }; return HTMLTextProcessor; }()); export { HTMLTextProcessor };