@bbob/parser
Version:
A BBCode to AST Parser part of @bbob
359 lines (358 loc) • 12.9 kB
JavaScript
/* eslint-disable no-plusplus,no-param-reassign */ "use strict";
Object.defineProperty(exports, "__esModule", {
value: true
});
function _export(target, all) {
for(var name in all)Object.defineProperty(target, name, {
enumerable: true,
get: all[name]
});
}
_export(exports, {
createLexer: function() {
return createLexer;
},
createTokenOfType: function() {
return createTokenOfType;
}
});
var _pluginhelper = require("@bbob/plugin-helper");
var _Token = require("./Token.js");
var _utils = require("./utils.js");
// for cases <!-- -->
var EM = "!";
function createTokenOfType(type, value, r, cl, p, e) {
if (r === void 0) r = 0;
if (cl === void 0) cl = 0;
if (p === void 0) p = 0;
if (e === void 0) e = 0;
return new _Token.Token(type, value, r, cl, p, e);
}
var STATE_WORD = 0;
var STATE_TAG = 1;
var STATE_TAG_ATTRS = 2;
var TAG_STATE_NAME = 0;
var TAG_STATE_ATTR = 1;
var TAG_STATE_VALUE = 2;
var WHITESPACES = [
_pluginhelper.SPACE,
_pluginhelper.TAB
];
var SPECIAL_CHARS = [
_pluginhelper.EQ,
_pluginhelper.SPACE,
_pluginhelper.TAB
];
var END_POS_OFFSET = 2; // length + start position offset
var isWhiteSpace = function(char) {
return WHITESPACES.indexOf(char) >= 0;
};
var isEscapeChar = function(char) {
return char === _pluginhelper.BACKSLASH;
};
var isSpecialChar = function(char) {
return SPECIAL_CHARS.indexOf(char) >= 0;
};
var isNewLine = function(char) {
return char === _pluginhelper.N;
};
var unq = function(val) {
return (0, _utils.unquote)((0, _utils.trimChar)(val, _pluginhelper.QUOTEMARK));
};
function createLexer(buffer, options) {
if (options === void 0) options = {};
var row = 0;
var prevCol = 0;
var col = 0;
var tokenIndex = -1;
var stateMode = STATE_WORD;
var tagMode = TAG_STATE_NAME;
var contextFreeTag = "";
var tokens = new Array(Math.floor(buffer.length));
var openTag = options.openTag || _pluginhelper.OPEN_BRAKET;
var closeTag = options.closeTag || _pluginhelper.CLOSE_BRAKET;
var escapeTags = !!options.enableEscapeTags;
var contextFreeTags = (options.contextFreeTags || []).filter(Boolean).map(function(tag) {
return tag.toLowerCase();
});
var caseFreeTags = options.caseFreeTags || false;
var nestedMap = new Map();
var onToken = options.onToken || function() {};
var RESERVED_CHARS = [
closeTag,
openTag,
_pluginhelper.QUOTEMARK,
_pluginhelper.BACKSLASH,
_pluginhelper.SPACE,
_pluginhelper.TAB,
_pluginhelper.EQ,
_pluginhelper.N,
EM
];
var NOT_CHAR_TOKENS = [
openTag,
_pluginhelper.SPACE,
_pluginhelper.TAB,
_pluginhelper.N
];
var isCharReserved = function(char) {
return RESERVED_CHARS.indexOf(char) >= 0;
};
var isCharToken = function(char) {
return NOT_CHAR_TOKENS.indexOf(char) === -1;
};
var isEscapableChar = function(char) {
return char === openTag || char === closeTag || char === _pluginhelper.BACKSLASH;
};
var onSkip = function() {
col++;
};
var setupContextFreeTag = function(name, isClosingTag) {
if (contextFreeTag !== "" && isClosingTag) {
contextFreeTag = "";
}
var tagName = name.toLowerCase();
if (contextFreeTag === "" && isTokenNested(name) && contextFreeTags.includes(tagName)) {
contextFreeTag = tagName;
}
};
var toEndTag = function(tagName) {
return "" + openTag + _pluginhelper.SLASH + tagName + closeTag;
};
var chars = (0, _utils.createCharGrabber)(buffer, {
onSkip: onSkip
});
/**
* Emits newly created token to subscriber
*
* @param {number} type - 1 - word, 2 - tag, 3 - attr-name, 4 - attr-value, 5 - space, 6 - new-line
* @param {string} value - token value
* @param {number} startPos - start position
* @param {number} endPos - end position
*/ function emitToken(type, value, startPos, endPos) {
var token = createTokenOfType(type, value, row, prevCol, startPos, endPos);
onToken(token);
prevCol = col;
tokenIndex += 1;
tokens[tokenIndex] = token;
}
function nextTagState(tagChars, isSingleValueTag, masterStartPos) {
if (tagMode === TAG_STATE_ATTR) {
var validAttrName = function(char) {
return !(char === _pluginhelper.EQ || isWhiteSpace(char));
};
var name = tagChars.grabWhile(validAttrName);
var isEnd = tagChars.isLast();
var isValue = tagChars.getCurr() !== _pluginhelper.EQ;
tagChars.skip();
if (isEnd || isValue) {
emitToken(_Token.TYPE_ATTR_VALUE, unq(name));
} else {
emitToken(_Token.TYPE_ATTR_NAME, name);
}
if (isEnd) {
return TAG_STATE_NAME;
}
if (isValue) {
return TAG_STATE_ATTR;
}
return TAG_STATE_VALUE;
}
if (tagMode === TAG_STATE_VALUE) {
var stateSpecial = false;
var validAttrValue = function(char) {
// const isEQ = char === EQ;
var isQM = char === _pluginhelper.QUOTEMARK;
var prevChar = tagChars.getPrev();
var nextChar = tagChars.getNext();
var isPrevSLASH = prevChar === _pluginhelper.BACKSLASH;
var isNextEQ = nextChar === _pluginhelper.EQ;
var isWS = isWhiteSpace(char);
// const isPrevWS = isWhiteSpace(prevChar);
var isNextWS = !!nextChar && isWhiteSpace(nextChar);
if (stateSpecial && isSpecialChar(char)) {
return true;
}
if (isQM && !isPrevSLASH) {
stateSpecial = !stateSpecial;
if (!stateSpecial && !(isNextEQ || isNextWS)) {
return false;
}
}
if (!isSingleValueTag) {
return !isWS;
// return (isEQ || isWS) === false;
}
return true;
};
var name1 = tagChars.grabWhile(validAttrValue);
tagChars.skip();
emitToken(_Token.TYPE_ATTR_VALUE, unq(name1));
if (tagChars.getPrev() === _pluginhelper.QUOTEMARK) {
prevCol++;
}
if (tagChars.isLast()) {
return TAG_STATE_NAME;
}
return TAG_STATE_ATTR;
}
var start = masterStartPos + tagChars.getPos() - 1;
var validName = function(char) {
return !(char === _pluginhelper.EQ || isWhiteSpace(char) || tagChars.isLast());
};
var name2 = tagChars.grabWhile(validName);
emitToken(_Token.TYPE_TAG, name2, start, masterStartPos + tagChars.getLength() + 1);
setupContextFreeTag(name2);
tagChars.skip();
prevCol++;
// in cases when we have [url=someval]GET[/url] and we don't need to parse all
if (isSingleValueTag) {
return TAG_STATE_VALUE;
}
var hasEQ = tagChars.includes(_pluginhelper.EQ);
return hasEQ ? TAG_STATE_ATTR : TAG_STATE_VALUE;
}
function stateTag() {
var currChar = chars.getCurr();
var nextChar = chars.getNext();
var isNextCharReserved = Boolean(nextChar && isCharReserved(nextChar));
chars.skip(); // skip openTag
// detect case where we have '[My word [tag][/tag]' or we have '[My last line word'
var substr = chars.substrUntilChar(closeTag);
var hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0;
var isLastChar = chars.isLast();
var hasSpace = substr.indexOf(_pluginhelper.SPACE) >= 0;
var isSpaceRestricted = hasSpace && options.whitespaceInTags === false;
if (isNextCharReserved || hasInvalidChars || isLastChar || isSpaceRestricted) {
emitToken(_Token.TYPE_WORD, currChar);
return STATE_WORD;
}
// [myTag ]
var isNoAttrsInTag = substr.indexOf(_pluginhelper.EQ) === -1;
// [/myTag]
var isClosingTag = substr[0] === _pluginhelper.SLASH;
// [url] or [/url]
if (isNoAttrsInTag || isClosingTag) {
var startPos = chars.getPos() - 1;
var name = chars.grabWhile(function(char) {
return char !== closeTag;
});
var endPos = startPos + name.length + END_POS_OFFSET;
chars.skip(); // skip closeTag
emitToken(_Token.TYPE_TAG, name, startPos, endPos);
setupContextFreeTag(name, isClosingTag);
return STATE_WORD;
}
return STATE_TAG_ATTRS;
}
function stateAttrs() {
var startPos = chars.getPos();
var silent = true;
var tagStr = chars.grabWhile(function(char) {
return char !== closeTag;
}, silent);
var tagGrabber = (0, _utils.createCharGrabber)(tagStr, {
onSkip: onSkip
});
var eqParts = tagStr.split(_pluginhelper.EQ);
var tagName = eqParts[0];
var isEndTag = tagName[0] === _pluginhelper.SLASH;
var isSingleAttrTag = tagName.indexOf(_pluginhelper.SPACE) === -1;
var isSingleValueTag = !isEndTag && isSingleAttrTag;
tagMode = TAG_STATE_NAME;
while(tagGrabber.hasNext()){
tagMode = nextTagState(tagGrabber, isSingleValueTag, startPos);
}
chars.skip(); // skip closeTag
return STATE_WORD;
}
function stateWord() {
if (isNewLine(chars.getCurr())) {
emitToken(_Token.TYPE_NEW_LINE, chars.getCurr());
chars.skip();
col = 0;
prevCol = 0;
row++;
return STATE_WORD;
}
if (isWhiteSpace(chars.getCurr())) {
var word = chars.grabWhile(isWhiteSpace);
emitToken(_Token.TYPE_SPACE, word);
return STATE_WORD;
}
if (chars.getCurr() === openTag) {
if (contextFreeTag) {
var fullTagName = toEndTag(contextFreeTag);
var foundTag = chars.grabN(fullTagName.length);
var isContextFreeEnded = foundTag.toLowerCase() === fullTagName.toLowerCase();
if (isContextFreeEnded) {
return STATE_TAG;
}
} else if (chars.includes(closeTag)) {
return STATE_TAG;
}
emitToken(_Token.TYPE_WORD, chars.getCurr());
chars.skip();
prevCol++;
return STATE_WORD;
}
if (escapeTags) {
if (isEscapeChar(chars.getCurr())) {
var currChar = chars.getCurr();
var nextChar = chars.getNext();
chars.skip(); // skip the \ without emitting anything
if (nextChar && isEscapableChar(nextChar)) {
chars.skip(); // skip past the [, ] or \ as well
emitToken(_Token.TYPE_WORD, nextChar);
return STATE_WORD;
}
emitToken(_Token.TYPE_WORD, currChar);
return STATE_WORD;
}
var isChar = function(char) {
return isCharToken(char) && !isEscapeChar(char);
};
var word1 = chars.grabWhile(isChar);
emitToken(_Token.TYPE_WORD, word1);
return STATE_WORD;
}
var word2 = chars.grabWhile(isCharToken);
emitToken(_Token.TYPE_WORD, word2);
return STATE_WORD;
}
function tokenize() {
stateMode = STATE_WORD;
while(chars.hasNext()){
switch(stateMode){
case STATE_TAG:
stateMode = stateTag();
break;
case STATE_TAG_ATTRS:
stateMode = stateAttrs();
break;
case STATE_WORD:
default:
stateMode = stateWord();
break;
}
}
tokens.length = tokenIndex + 1;
return tokens;
}
function isTokenNested(tokenValue) {
var value = toEndTag(tokenValue);
if (nestedMap.has(value)) {
return !!nestedMap.get(value);
} else {
var buf = caseFreeTags ? buffer.toLowerCase() : buffer;
var val = caseFreeTags ? value.toLowerCase() : value;
var status = buf.indexOf(val) > -1;
nestedMap.set(value, status);
return status;
}
}
return {
tokenize: tokenize,
isTokenNested: isTokenNested
};
}