@bbob/parser
Version:
Just parses BBcode to AST array. Part of @bbob bbcode parser
270 lines (216 loc) • 7.75 kB
JavaScript
"use strict";
exports.__esModule = true;
exports.createLexer = createLexer;
exports.createTokenOfType = void 0;
var _char9 = require("@bbob/plugin-helper/lib/char");
var _Token = require("./Token");
var _utils = require("./utils");
/* eslint-disable no-plusplus,no-param-reassign */
// for cases <!-- -->
var EM = '!';
/**
* Creates a Token entity class
* @param {String} type
* @param {String} value
* @param {Number} r line number
* @param {Number} cl char number in line
*/
var createToken = function createToken(type, value, r, cl) {
if (r === void 0) {
r = 0;
}
if (cl === void 0) {
cl = 0;
}
return new _Token.Token(type, value, r, cl);
};
/**
* @typedef {Object} Lexer
* @property {Function} tokenize
* @property {Function} isTokenNested
*/
/**
* @param {String} buffer
* @param {Object} options
* @param {Function} options.onToken
* @param {String} options.openTag
* @param {String} options.closeTag
* @param {Boolean} options.enableEscapeTags
* @return {Lexer}
*/
function createLexer(buffer, options) {
if (options === void 0) {
options = {};
}
var row = 0;
var col = 0;
var tokenIndex = -1;
var tokens = new Array(Math.floor(buffer.length));
var openTag = options.openTag || _char9.OPEN_BRAKET;
var closeTag = options.closeTag || _char9.CLOSE_BRAKET;
var escapeTags = options.enableEscapeTags;
var RESERVED_CHARS = [closeTag, openTag, _char9.QUOTEMARK, _char9.BACKSLASH, _char9.SPACE, _char9.TAB, _char9.EQ, _char9.N, EM];
var NOT_CHAR_TOKENS = [// ...(options.enableEscapeTags ? [BACKSLASH] : []),
openTag, _char9.SPACE, _char9.TAB, _char9.N];
var WHITESPACES = [_char9.SPACE, _char9.TAB];
var SPECIAL_CHARS = [_char9.EQ, _char9.SPACE, _char9.TAB];
var isCharReserved = function isCharReserved(_char) {
return RESERVED_CHARS.indexOf(_char) >= 0;
};
var isWhiteSpace = function isWhiteSpace(_char2) {
return WHITESPACES.indexOf(_char2) >= 0;
};
var isCharToken = function isCharToken(_char3) {
return NOT_CHAR_TOKENS.indexOf(_char3) === -1;
};
var isSpecialChar = function isSpecialChar(_char4) {
return SPECIAL_CHARS.indexOf(_char4) >= 0;
};
var isEscapableChar = function isEscapableChar(_char5) {
return _char5 === openTag || _char5 === closeTag || _char5 === _char9.BACKSLASH;
};
var isEscapeChar = function isEscapeChar(_char6) {
return _char6 === _char9.BACKSLASH;
};
/**
* Emits newly created token to subscriber
* @param token
*/
var emitToken = function emitToken(token) {
if (options.onToken) {
options.onToken(token);
}
tokenIndex += 1;
tokens[tokenIndex] = token;
};
/**
* Parses params inside [myTag---params goes here---]content[/myTag]
* @param str
* @returns {{tag: *, attrs: Array}}
*/
var parseAttrs = function parseAttrs(str) {
var tagName = null;
var skipSpecialChars = false;
var attrTokens = [];
var attrCharGrabber = (0, _utils.createCharGrabber)(str);
var validAttr = function validAttr(_char7) {
var isEQ = _char7 === _char9.EQ;
var isWS = isWhiteSpace(_char7);
var prevChar = attrCharGrabber.getPrev();
var nextChar = attrCharGrabber.getNext();
var isPrevSLASH = prevChar === _char9.BACKSLASH;
var isTagNameEmpty = tagName === null;
if (isTagNameEmpty) {
return (isEQ || isWS || attrCharGrabber.isLast()) === false;
}
if (skipSpecialChars && isSpecialChar(_char7)) {
return true;
}
if (_char7 === _char9.QUOTEMARK && !isPrevSLASH) {
skipSpecialChars = !skipSpecialChars;
if (!skipSpecialChars && !(nextChar === _char9.EQ || isWhiteSpace(nextChar))) {
return false;
}
}
return (isEQ || isWS) === false;
};
var nextAttr = function nextAttr() {
var attrStr = attrCharGrabber.grabWhile(validAttr);
var currChar = attrCharGrabber.getCurr(); // first string before space is a tag name [tagName params...]
if (tagName === null) {
tagName = attrStr;
} else if (isWhiteSpace(currChar) || currChar === _char9.QUOTEMARK || !attrCharGrabber.hasNext()) {
var escaped = (0, _utils.unquote)((0, _utils.trimChar)(attrStr, _char9.QUOTEMARK));
attrTokens.push(createToken(_Token.TYPE_ATTR_VALUE, escaped, row, col));
} else {
attrTokens.push(createToken(_Token.TYPE_ATTR_NAME, attrStr, row, col));
}
attrCharGrabber.skip();
};
while (attrCharGrabber.hasNext()) {
nextAttr();
}
return {
tag: tagName,
attrs: attrTokens
};
};
var bufferGrabber = (0, _utils.createCharGrabber)(buffer, {
onSkip: function onSkip() {
col++;
}
});
var next = function next() {
var currChar = bufferGrabber.getCurr();
var nextChar = bufferGrabber.getNext();
if (currChar === _char9.N) {
bufferGrabber.skip();
col = 0;
row++;
emitToken(createToken(_Token.TYPE_NEW_LINE, currChar, row, col));
} else if (isWhiteSpace(currChar)) {
var str = bufferGrabber.grabWhile(isWhiteSpace);
emitToken(createToken(_Token.TYPE_SPACE, str, row, col));
} else if (escapeTags && isEscapeChar(currChar) && isEscapableChar(nextChar)) {
bufferGrabber.skip(); // skip the \ without emitting anything
bufferGrabber.skip(); // skip past the [, ] or \ as well
emitToken(createToken(_Token.TYPE_WORD, nextChar, row, col));
} else if (currChar === openTag) {
bufferGrabber.skip(); // skip openTag
// detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
var substr = bufferGrabber.substrUntilChar(closeTag);
var hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
if (isCharReserved(nextChar) || hasInvalidChars || bufferGrabber.isLast()) {
emitToken(createToken(_Token.TYPE_WORD, currChar, row, col));
} else {
var _str = bufferGrabber.grabWhile(function (val) {
return val !== closeTag;
});
bufferGrabber.skip(); // skip closeTag
// [myTag ]
var isNoAttrsInTag = _str.indexOf(_char9.EQ) === -1; // [/myTag]
var isClosingTag = _str[0] === _char9.SLASH;
if (isNoAttrsInTag || isClosingTag) {
emitToken(createToken(_Token.TYPE_TAG, _str, row, col));
} else {
var parsed = parseAttrs(_str);
emitToken(createToken(_Token.TYPE_TAG, parsed.tag, row, col));
parsed.attrs.map(emitToken);
}
}
} else if (currChar === closeTag) {
bufferGrabber.skip(); // skip closeTag
emitToken(createToken(_Token.TYPE_WORD, currChar, row, col));
} else if (isCharToken(currChar)) {
if (escapeTags && isEscapeChar(currChar) && !isEscapableChar(nextChar)) {
bufferGrabber.skip();
emitToken(createToken(_Token.TYPE_WORD, currChar, row, col));
} else {
var _str2 = bufferGrabber.grabWhile(function (_char8) {
if (escapeTags) {
return isCharToken(_char8) && !isEscapeChar(_char8);
}
return isCharToken(_char8);
});
emitToken(createToken(_Token.TYPE_WORD, _str2, row, col));
}
}
};
var tokenize = function tokenize() {
while (bufferGrabber.hasNext()) {
next();
}
tokens.length = tokenIndex + 1;
return tokens;
};
var isTokenNested = function isTokenNested(token) {
var value = openTag + _char9.SLASH + token.getValue(); // potential bottleneck
return buffer.indexOf(value) > -1;
};
return {
tokenize: tokenize,
isTokenNested: isTokenNested
};
}
var createTokenOfType = createToken;
exports.createTokenOfType = createTokenOfType;