@bbob/parser
Version:
Just parses BBcode to AST array. Part of @bbob bbcode parser
390 lines (301 loc) • 9.79 kB
JavaScript
"use strict";
exports.__esModule = true;
exports.createLexer = createLexer;
exports.createTokenOfType = void 0;
var _char14 = require("@bbob/plugin-helper/lib/char");
var _Token = require("./Token");
var _utils = require("./utils");
/* eslint-disable no-plusplus,no-param-reassign */
// for cases <!-- -->
var EM = '!';
/**
* Creates a Token entity class
* @param {Number} type
* @param {String} value
* @param {Number} r line number
* @param {Number} cl char number in line
*/
var createToken = function createToken(type, value, r, cl) {
if (r === void 0) {
r = 0;
}
if (cl === void 0) {
cl = 0;
}
return new _Token.Token(type, value, r, cl);
};
/**
* @typedef {Object} Lexer
* @property {Function} tokenize
* @property {Function} isTokenNested
*/
/**
* @param {String} buffer
* @param {Object} options
* @param {Function} options.onToken
* @param {String} options.openTag
* @param {String} options.closeTag
* @param {Boolean} options.enableEscapeTags
* @return {Lexer}
*/
function createLexer(buffer, options) {
if (options === void 0) {
options = {};
}
var STATE_WORD = 0;
var STATE_TAG = 1;
var STATE_TAG_ATTRS = 2;
var TAG_STATE_NAME = 0;
var TAG_STATE_ATTR = 1;
var TAG_STATE_VALUE = 2;
var row = 0;
var col = 0;
var tokenIndex = -1;
var stateMode = STATE_WORD;
var tagMode = TAG_STATE_NAME;
var tokens = new Array(Math.floor(buffer.length));
var openTag = options.openTag || _char14.OPEN_BRAKET;
var closeTag = options.closeTag || _char14.CLOSE_BRAKET;
var escapeTags = !!options.enableEscapeTags;
var onToken = options.onToken || function () {};
var RESERVED_CHARS = [closeTag, openTag, _char14.QUOTEMARK, _char14.BACKSLASH, _char14.SPACE, _char14.TAB, _char14.EQ, _char14.N, EM];
var NOT_CHAR_TOKENS = [// ...(options.enableEscapeTags ? [BACKSLASH] : []),
openTag, _char14.SPACE, _char14.TAB, _char14.N];
var WHITESPACES = [_char14.SPACE, _char14.TAB];
var SPECIAL_CHARS = [_char14.EQ, _char14.SPACE, _char14.TAB];
var isCharReserved = function isCharReserved(_char) {
return RESERVED_CHARS.indexOf(_char) >= 0;
};
var isNewLine = function isNewLine(_char2) {
return _char2 === _char14.N;
};
var isWhiteSpace = function isWhiteSpace(_char3) {
return WHITESPACES.indexOf(_char3) >= 0;
};
var isCharToken = function isCharToken(_char4) {
return NOT_CHAR_TOKENS.indexOf(_char4) === -1;
};
var isSpecialChar = function isSpecialChar(_char5) {
return SPECIAL_CHARS.indexOf(_char5) >= 0;
};
var isEscapableChar = function isEscapableChar(_char6) {
return _char6 === openTag || _char6 === closeTag || _char6 === _char14.BACKSLASH;
};
var isEscapeChar = function isEscapeChar(_char7) {
return _char7 === _char14.BACKSLASH;
};
var onSkip = function onSkip() {
col++;
};
var unq = function unq(val) {
return (0, _utils.unquote)((0, _utils.trimChar)(val, _char14.QUOTEMARK));
};
var chars = (0, _utils.createCharGrabber)(buffer, {
onSkip: onSkip
});
/**
* Emits newly created token to subscriber
* @param {Number} type
* @param {String} value
*/
function emitToken(type, value) {
var token = createToken(type, value, row, col);
onToken(token);
tokenIndex += 1;
tokens[tokenIndex] = token;
}
function nextTagState(tagChars, isSingleValueTag) {
if (tagMode === TAG_STATE_NAME) {
var currChar = tagChars.getCurr();
var hasNext = tagChars.hasNext();
var isWS = isWhiteSpace(currChar);
var isQM = currChar === _char14.QUOTEMARK;
if (isWS || isQM || !hasNext) {
return TAG_STATE_VALUE;
}
var validName = function validName(_char8) {
return !(_char8 === _char14.EQ || isWhiteSpace(_char8) || tagChars.isLast());
};
var name = tagChars.grabWhile(validName);
emitToken(_Token.TYPE_TAG, name);
tagChars.skip(); // in cases when we has [url=someval]GET[/url] and we dont need to parse all
if (isSingleValueTag) {
return TAG_STATE_VALUE;
}
var hasEQ = tagChars.includes(_char14.EQ);
return hasEQ ? TAG_STATE_ATTR : TAG_STATE_VALUE;
}
if (tagMode === TAG_STATE_ATTR) {
var validAttrName = function validAttrName(_char9) {
return !(_char9 === _char14.EQ || isWhiteSpace(_char9));
};
var _name = tagChars.grabWhile(validAttrName);
var isEnd = tagChars.isLast();
var isValue = tagChars.getCurr() !== _char14.EQ;
tagChars.skip();
if (isEnd || isValue) {
emitToken(_Token.TYPE_ATTR_VALUE, unq(_name));
} else {
emitToken(_Token.TYPE_ATTR_NAME, _name);
}
if (isEnd) {
return TAG_STATE_NAME;
}
if (isValue) {
return TAG_STATE_ATTR;
}
return TAG_STATE_VALUE;
}
if (tagMode === TAG_STATE_VALUE) {
var stateSpecial = false;
var validAttrValue = function validAttrValue(_char10) {
// const isEQ = char === EQ;
var isQM = _char10 === _char14.QUOTEMARK;
var prevChar = tagChars.getPrev();
var nextChar = tagChars.getNext();
var isPrevSLASH = prevChar === _char14.BACKSLASH;
var isNextEQ = nextChar === _char14.EQ;
var isWS = isWhiteSpace(_char10); // const isPrevWS = isWhiteSpace(prevChar);
var isNextWS = isWhiteSpace(nextChar);
if (stateSpecial && isSpecialChar(_char10)) {
return true;
}
if (isQM && !isPrevSLASH) {
stateSpecial = !stateSpecial;
if (!stateSpecial && !(isNextEQ || isNextWS)) {
return false;
}
}
if (!isSingleValueTag) {
return isWS === false; // return (isEQ || isWS) === false;
}
return true;
};
var _name2 = tagChars.grabWhile(validAttrValue);
tagChars.skip();
emitToken(_Token.TYPE_ATTR_VALUE, unq(_name2));
if (tagChars.isLast()) {
return TAG_STATE_NAME;
}
return TAG_STATE_ATTR;
}
return TAG_STATE_NAME;
}
function stateTag() {
var currChar = chars.getCurr();
if (currChar === openTag) {
var nextChar = chars.getNext();
chars.skip(); // detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
var substr = chars.substrUntilChar(closeTag);
var hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
if (isCharReserved(nextChar) || hasInvalidChars || chars.isLast()) {
emitToken(_Token.TYPE_WORD, currChar);
return STATE_WORD;
} // [myTag ]
var isNoAttrsInTag = substr.indexOf(_char14.EQ) === -1; // [/myTag]
var isClosingTag = substr[0] === _char14.SLASH;
if (isNoAttrsInTag || isClosingTag) {
var name = chars.grabWhile(function (_char11) {
return _char11 !== closeTag;
});
chars.skip(); // skip closeTag
emitToken(_Token.TYPE_TAG, name);
return STATE_WORD;
}
return STATE_TAG_ATTRS;
}
if (currChar === closeTag) {
chars.skip();
emitToken(_Token.TYPE_WORD, currChar);
return STATE_WORD;
}
return STATE_WORD;
}
function stateAttrs() {
var silent = true;
var tagStr = chars.grabWhile(function (_char12) {
return _char12 !== closeTag;
}, silent);
var tagGrabber = (0, _utils.createCharGrabber)(tagStr, {
onSkip: onSkip
});
var hasSpace = tagGrabber.includes(_char14.SPACE);
while (tagGrabber.hasNext()) {
tagMode = nextTagState(tagGrabber, !hasSpace);
}
chars.skip(); // skip closeTag
return STATE_WORD;
}
function stateWord() {
if (isNewLine(chars.getCurr())) {
emitToken(_Token.TYPE_NEW_LINE, chars.getCurr());
chars.skip();
col = 0;
row++;
return STATE_WORD;
}
if (isWhiteSpace(chars.getCurr())) {
emitToken(_Token.TYPE_SPACE, chars.grabWhile(isWhiteSpace));
return STATE_WORD;
}
if (chars.getCurr() === openTag) {
if (chars.includes(closeTag)) {
return STATE_TAG;
}
emitToken(_Token.TYPE_WORD, chars.getCurr());
chars.skip();
return STATE_WORD;
}
if (escapeTags) {
if (isEscapeChar(chars.getCurr())) {
var currChar = chars.getCurr();
var nextChar = chars.getNext();
chars.skip(); // skip the \ without emitting anything
if (isEscapableChar(nextChar)) {
chars.skip(); // skip past the [, ] or \ as well
emitToken(_Token.TYPE_WORD, nextChar);
return STATE_WORD;
}
emitToken(_Token.TYPE_WORD, currChar);
return STATE_WORD;
}
var isChar = function isChar(_char13) {
return isCharToken(_char13) && !isEscapeChar(_char13);
};
emitToken(_Token.TYPE_WORD, chars.grabWhile(isChar));
return STATE_WORD;
}
emitToken(_Token.TYPE_WORD, chars.grabWhile(isCharToken));
return STATE_WORD;
}
function tokenize() {
while (chars.hasNext()) {
switch (stateMode) {
case STATE_TAG:
stateMode = stateTag();
break;
case STATE_TAG_ATTRS:
stateMode = stateAttrs();
break;
case STATE_WORD:
stateMode = stateWord();
break;
default:
stateMode = STATE_WORD;
break;
}
}
tokens.length = tokenIndex + 1;
return tokens;
}
function isTokenNested(token) {
var value = openTag + _char14.SLASH + token.getValue(); // potential bottleneck
return buffer.indexOf(value) > -1;
}
return {
tokenize: tokenize,
isTokenNested: isTokenNested
};
}
var createTokenOfType = createToken;
exports.createTokenOfType = createTokenOfType;