rtf-stream-parser
Version:
Stream Transform class to tokenize RTF, and another to de-encapsulate text or HTML
193 lines (192 loc) • 6.59 kB
JavaScript
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.Tokenize = void 0;
const stream_1 = require("stream");
const utils_1 = require("./utils");
const isAlpha = (c) => (c > 64 && c < 91) || (c > 96 && c < 123);
const isNumeric = (c) => c > 47 && c < 58;
class Tokenize extends stream_1.Transform {
constructor() {
super({ readableObjectMode: true });
this._expectedBinaryBytes = 0;
this._readHexDigitsCount = 0;
this._paramStr = '';
this._mode = 0;
}
_flushToken() {
const token = this._token;
if (token) {
if (this._paramStr) {
token.param = Number.parseInt(this._paramStr, 10);
}
const buf = token.data;
if (buf) {
if (buf.length > (token.length || 0)) {
token.data = buf.slice(0, token.length);
}
delete token.length;
}
this.push(token);
}
this._token = null;
this._readHexDigitsCount = 0;
this._paramStr = '';
this._mode = 0;
}
_handleSpecialOrPush() {
const token = this._token;
const param = parseInt(this._paramStr || '0', 10) || 0;
if (token.type === 2 && token.word === 'bin' && param > 0) {
this._mode = 4;
token.data = Buffer.alloc(param);
token.length = 0;
}
else if (token.type === 2 && token.word === '\'') {
this._mode = 5;
token.data = Buffer.alloc(1);
token.length = 0;
this._readHexDigitsCount = 0;
}
else {
this._flushToken();
}
}
_handleByte(c) {
switch (this._mode) {
case 4: {
const token = this._token;
if (token.data && utils_1.isNum(token.length)) {
token.data[token.length++] = c;
}
if (!utils_1.isNum(token.length) || !token.data || token.length >= token.data.length) {
this._flushToken();
}
break;
}
case 5: {
const token = this._token;
const byte = parseInt(String.fromCharCode(c), 16);
if (isNaN(byte) || !token.data) {
console.warn('Bad hex digit');
}
else if (this._readHexDigitsCount === 0) {
token.data[0] += byte * 16;
}
else {
token.data[0] += byte;
}
this._readHexDigitsCount++;
if (this._readHexDigitsCount === 2) {
token.length = 1;
this._flushToken();
}
break;
}
case 1: {
if (!isAlpha(c)) {
this._token = {
type: 2,
word: String.fromCharCode(c)
};
this._handleSpecialOrPush();
}
else {
this._mode = 2;
this._token = {
type: 2,
word: String.fromCharCode(c)
};
}
break;
}
case 2: {
const token = this._token;
if (isAlpha(c)) {
token.word += String.fromCharCode(c);
}
else if (isNumeric(c) || c === 45) {
this._mode = 3;
this._paramStr = String.fromCharCode(c);
}
else {
this._handleSpecialOrPush();
if (c !== 32)
this._handleByte(c);
}
break;
}
case 3: {
if (isNumeric(c)) {
this._paramStr += String.fromCharCode(c);
}
else {
this._handleSpecialOrPush();
if (c !== 32)
this._handleByte(c);
}
break;
}
case 0: {
switch (c) {
case 123:
this._flushToken();
this.push({ type: 0 });
break;
case 125:
this._flushToken();
this.push({ type: 1 });
break;
case 92:
this._flushToken();
this._mode = 1;
break;
case 13:
case 10:
break;
default: {
const token = this._token;
if (!token) {
this._token = {
type: 3,
data: Buffer.alloc(256),
length: 1
};
this._token.data[0] = c;
}
else if (token && token.type === 3) {
if (token.length >= token.data.length) {
token.data = Buffer.concat([token.data, Buffer.alloc(256)]);
}
token.data[token.length++] = c;
}
else {
throw new Error('Unpushed token!');
}
}
}
break;
}
default:
throw new Error('Unknown state!');
}
}
_transform(chunk, encoding, cb) {
try {
const buf = utils_1.isStr(chunk) ? Buffer.from(chunk, encoding) : chunk;
for (let i = 0; i < buf.length; i++) {
const c = buf[i];
this._handleByte(c);
}
}
catch (err) {
return cb(err);
}
cb();
}
_flush(cb) {
this._flushToken();
cb();
}
}
exports.Tokenize = Tokenize;
exports.default = Tokenize;