UNPKG

rtf-stream-parser

Version:

Stream Transform class to tokenize RTF, and another to de-encapsulate text or HTML

119 lines (118 loc) 4.86 kB
"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.DeEncapsulate = void 0; const checkVersion_1 = require("./features/checkVersion"); const countTokens_1 = require("./features/countTokens"); const handleCharacterSet_1 = require("./features/handleCharacterSet"); const handleControlsAndDestinations_1 = require("./features/handleControlsAndDestinations"); const handleDeEncapsulation_1 = require("./features/handleDeEncapsulation"); const handleFonts_1 = require("./features/handleFonts"); const handleGroupState_1 = require("./features/handleGroupState"); const handleOutput_1 = require("./features/handleOutput"); const handleUnicodeSkip_1 = require("./features/handleUnicodeSkip"); const textEscapes_1 = require("./features/textEscapes"); const ProcessTokens_1 = require("./ProcessTokens"); const deEncExtraDefaultOptions = { htmlEncodeNonAscii: false, htmlFixContentType: false, htmlPreserveSpaces: false, mode: 'either', prefix: false, outlookQuirksMode: false, }; function htmlEntityEncode(str) { const pieces = []; let ascii = true; for (const char of str) { const codepoint = char.codePointAt(0); if (codepoint === 0xA0) { ascii = false; pieces.push('&nbsp;'); } else if (codepoint > 0x7F) { ascii = false; pieces.push('&#x' + codepoint.toString(16) + ';'); } else { pieces.push(char); } } const out = ascii ? str : pieces.join(''); return out; } const rxCharset = /(\bcharset=)([\w-]+)(")/i; class DeEncapsulate extends ProcessTokens_1.ProcessTokens { constructor(options) { super(options); this._featureHandlers = [ countTokens_1.countTokens, checkVersion_1.checkVersion, handleGroupState_1.handleGroupState, handleUnicodeSkip_1.handleUnicodeSkip, handleControlsAndDestinations_1.handleControlsAndDestinations, handleCharacterSet_1.handleCharacterSet, handleFonts_1.handleFonts, handleDeEncapsulation_1.handleDeEncapsulation, handleOutput_1.handleOutput, textEscapes_1.handleTextEscapes, ]; this._rootState = { uc: 1, groupDepth: 0, destDepth: 0, destGroupDepth: 0 }; this._state = this._rootState; this._fromhtml = false; this._fromtext = false; this._didHtmlCharsetReplace = false; this._options = Object.assign(Object.assign(Object.assign({}, ProcessTokens_1.procTokensDefaultOptions), deEncExtraDefaultOptions), options); } get isHtml() { return this._fromhtml; } get isText() { return this._fromtext; } get originalHtmlCharset() { return this._originalHtmlCharset; } _getOutputAsString(data, font) { var _a; let [outStr, areSymbolFontCodepoints] = super._getOutputAsString(data, font); if (this._fromhtml) { const insideHtmltag = !!((_a = this._state.allDestinations) === null || _a === void 0 ? void 0 : _a['htmltag']); if (insideHtmltag) { if (this._options.htmlFixContentType && !this._didHtmlCharsetReplace) { outStr = outStr.replace(rxCharset, (match, pre, charset, post) => { this._didHtmlCharsetReplace = true; this._originalHtmlCharset = charset; return pre + 'UTF-8' + post; }); } } else { outStr = outStr.replace(/<|>/g, match => { return match === '<' ? '&lt;' : '&gt;'; }); if (this._options.htmlPreserveSpaces) { if (outStr === ' ') { outStr = '\u00A0'; } else { outStr = outStr .replace(/ +/g, match => ' ' + '\u00A0'.repeat(match.length - 1)) .replace(/^ +/, match => '\u00A0'.repeat(match.length)) .replace(/ +$/, match => '\u00A0'.repeat(match.length)); } } if (this._options.htmlEncodeNonAscii) { outStr = htmlEntityEncode(outStr); } } } return [outStr, areSymbolFontCodepoints]; } _getCurrentFont() { const allDests = this._state.allDestinations || {}; const insideHtmltag = !!allDests['htmltag']; return insideHtmltag ? undefined : super._getCurrentFont(); } } exports.DeEncapsulate = DeEncapsulate; exports.default = DeEncapsulate;