UNPKG

rtf-stream-parser

Version:

Stream Transform class to tokenize RTF, and another to de-encapsulate text or HTML

github.com/mazira/rtf-stream-parser

mazira/rtf-stream-parser

193 lines (192 loc) • 6.59 kB

JavaScript

"use strict"; Object.defineProperty(exports, "__esModule", { value: true }); exports.Tokenize = void 0; const stream_1 = require("stream"); const utils_1 = require("./utils"); const isAlpha = (c) => (c > 64 && c < 91) || (c > 96 && c < 123); const isNumeric = (c) => c > 47 && c < 58; class Tokenize extends stream_1.Transform { constructor() { super({ readableObjectMode: true }); this._expectedBinaryBytes = 0; this._readHexDigitsCount = 0; this._paramStr = ''; this._mode = 0; } _flushToken() { const token = this._token; if (token) { if (this._paramStr) { token.param = Number.parseInt(this._paramStr, 10); } const buf = token.data; if (buf) { if (buf.length > (token.length || 0)) { token.data = buf.slice(0, token.length); } delete token.length; } this.push(token); } this._token = null; this._readHexDigitsCount = 0; this._paramStr = ''; this._mode = 0; } _handleSpecialOrPush() { const token = this._token; const param = parseInt(this._paramStr || '0', 10) || 0; if (token.type === 2 && token.word === 'bin' && param > 0) { this._mode = 4; token.data = Buffer.alloc(param); token.length = 0; } else if (token.type === 2 && token.word === '\'') { this._mode = 5; token.data = Buffer.alloc(1); token.length = 0; this._readHexDigitsCount = 0; } else { this._flushToken(); } } _handleByte(c) { switch (this._mode) { case 4: { const token = this._token; if (token.data && utils_1.isNum(token.length)) { token.data[token.length++] = c; } if (!utils_1.isNum(token.length) || !token.data || token.length >= token.data.length) { this._flushToken(); } break; } case 5: { const token = this._token; const byte = parseInt(String.fromCharCode(c), 16); if (isNaN(byte) || !token.data) { console.warn('Bad hex digit'); } else if (this._readHexDigitsCount === 0) { token.data[0] += byte * 16; } else { token.data[0] += byte; } this._readHexDigitsCount++; if (this._readHexDigitsCount === 2) { token.length = 1; this._flushToken(); } break; } case 1: { if (!isAlpha(c)) { this._token = { type: 2, word: String.fromCharCode(c) }; this._handleSpecialOrPush(); } else { this._mode = 2; this._token = { type: 2, word: String.fromCharCode(c) }; } break; } case 2: { const token = this._token; if (isAlpha(c)) { token.word += String.fromCharCode(c); } else if (isNumeric(c) || c === 45) { this._mode = 3; this._paramStr = String.fromCharCode(c); } else { this._handleSpecialOrPush(); if (c !== 32) this._handleByte(c); } break; } case 3: { if (isNumeric(c)) { this._paramStr += String.fromCharCode(c); } else { this._handleSpecialOrPush(); if (c !== 32) this._handleByte(c); } break; } case 0: { switch (c) { case 123: this._flushToken(); this.push({ type: 0 }); break; case 125: this._flushToken(); this.push({ type: 1 }); break; case 92: this._flushToken(); this._mode = 1; break; case 13: case 10: break; default: { const token = this._token; if (!token) { this._token = { type: 3, data: Buffer.alloc(256), length: 1 }; this._token.data[0] = c; } else if (token && token.type === 3) { if (token.length >= token.data.length) { token.data = Buffer.concat([token.data, Buffer.alloc(256)]); } token.data[token.length++] = c; } else { throw new Error('Unpushed token!'); } } } break; } default: throw new Error('Unknown state!'); } } _transform(chunk, encoding, cb) { try { const buf = utils_1.isStr(chunk) ? Buffer.from(chunk, encoding) : chunk; for (let i = 0; i < buf.length; i++) { const c = buf[i]; this._handleByte(c); } } catch (err) { return cb(err); } cb(); } _flush(cb) { this._flushToken(); cb(); } } exports.Tokenize = Tokenize; exports.default = Tokenize;