rtf-stream-parser
Version:
Stream Transform class to tokenize RTF, and another to de-encapsulate text or HTML
119 lines (118 loc) • 4.86 kB
JavaScript
;
Object.defineProperty(exports, "__esModule", { value: true });
exports.DeEncapsulate = void 0;
const checkVersion_1 = require("./features/checkVersion");
const countTokens_1 = require("./features/countTokens");
const handleCharacterSet_1 = require("./features/handleCharacterSet");
const handleControlsAndDestinations_1 = require("./features/handleControlsAndDestinations");
const handleDeEncapsulation_1 = require("./features/handleDeEncapsulation");
const handleFonts_1 = require("./features/handleFonts");
const handleGroupState_1 = require("./features/handleGroupState");
const handleOutput_1 = require("./features/handleOutput");
const handleUnicodeSkip_1 = require("./features/handleUnicodeSkip");
const textEscapes_1 = require("./features/textEscapes");
const ProcessTokens_1 = require("./ProcessTokens");
const deEncExtraDefaultOptions = {
htmlEncodeNonAscii: false,
htmlFixContentType: false,
htmlPreserveSpaces: false,
mode: 'either',
prefix: false,
outlookQuirksMode: false,
};
function htmlEntityEncode(str) {
const pieces = [];
let ascii = true;
for (const char of str) {
const codepoint = char.codePointAt(0);
if (codepoint === 0xA0) {
ascii = false;
pieces.push(' ');
}
else if (codepoint > 0x7F) {
ascii = false;
pieces.push('&#x' + codepoint.toString(16) + ';');
}
else {
pieces.push(char);
}
}
const out = ascii ? str : pieces.join('');
return out;
}
const rxCharset = /(\bcharset=)([\w-]+)(")/i;
class DeEncapsulate extends ProcessTokens_1.ProcessTokens {
constructor(options) {
super(options);
this._featureHandlers = [
countTokens_1.countTokens,
checkVersion_1.checkVersion,
handleGroupState_1.handleGroupState,
handleUnicodeSkip_1.handleUnicodeSkip,
handleControlsAndDestinations_1.handleControlsAndDestinations,
handleCharacterSet_1.handleCharacterSet,
handleFonts_1.handleFonts,
handleDeEncapsulation_1.handleDeEncapsulation,
handleOutput_1.handleOutput,
textEscapes_1.handleTextEscapes,
];
this._rootState = { uc: 1, groupDepth: 0, destDepth: 0, destGroupDepth: 0 };
this._state = this._rootState;
this._fromhtml = false;
this._fromtext = false;
this._didHtmlCharsetReplace = false;
this._options = Object.assign(Object.assign(Object.assign({}, ProcessTokens_1.procTokensDefaultOptions), deEncExtraDefaultOptions), options);
}
get isHtml() {
return this._fromhtml;
}
get isText() {
return this._fromtext;
}
get originalHtmlCharset() {
return this._originalHtmlCharset;
}
_getOutputAsString(data, font) {
var _a;
let [outStr, areSymbolFontCodepoints] = super._getOutputAsString(data, font);
if (this._fromhtml) {
const insideHtmltag = !!((_a = this._state.allDestinations) === null || _a === void 0 ? void 0 : _a['htmltag']);
if (insideHtmltag) {
if (this._options.htmlFixContentType && !this._didHtmlCharsetReplace) {
outStr = outStr.replace(rxCharset, (match, pre, charset, post) => {
this._didHtmlCharsetReplace = true;
this._originalHtmlCharset = charset;
return pre + 'UTF-8' + post;
});
}
}
else {
outStr = outStr.replace(/<|>/g, match => {
return match === '<' ? '<' : '>';
});
if (this._options.htmlPreserveSpaces) {
if (outStr === ' ') {
outStr = '\u00A0';
}
else {
outStr = outStr
.replace(/ +/g, match => ' ' + '\u00A0'.repeat(match.length - 1))
.replace(/^ +/, match => '\u00A0'.repeat(match.length))
.replace(/ +$/, match => '\u00A0'.repeat(match.length));
}
}
if (this._options.htmlEncodeNonAscii) {
outStr = htmlEntityEncode(outStr);
}
}
}
return [outStr, areSymbolFontCodepoints];
}
_getCurrentFont() {
const allDests = this._state.allDestinations || {};
const insideHtmltag = !!allDests['htmltag'];
return insideHtmltag ? undefined : super._getCurrentFont();
}
}
exports.DeEncapsulate = DeEncapsulate;
exports.default = DeEncapsulate;