UNPKG

@bbob/parser

Version:

A BBCode to AST Parser part of @bbob

359 lines (358 loc) 12.9 kB
/* eslint-disable no-plusplus,no-param-reassign */ "use strict"; Object.defineProperty(exports, "__esModule", { value: true }); function _export(target, all) { for(var name in all)Object.defineProperty(target, name, { enumerable: true, get: all[name] }); } _export(exports, { createLexer: function() { return createLexer; }, createTokenOfType: function() { return createTokenOfType; } }); var _pluginhelper = require("@bbob/plugin-helper"); var _Token = require("./Token.js"); var _utils = require("./utils.js"); // for cases <!-- --> var EM = "!"; function createTokenOfType(type, value, r, cl, p, e) { if (r === void 0) r = 0; if (cl === void 0) cl = 0; if (p === void 0) p = 0; if (e === void 0) e = 0; return new _Token.Token(type, value, r, cl, p, e); } var STATE_WORD = 0; var STATE_TAG = 1; var STATE_TAG_ATTRS = 2; var TAG_STATE_NAME = 0; var TAG_STATE_ATTR = 1; var TAG_STATE_VALUE = 2; var WHITESPACES = [ _pluginhelper.SPACE, _pluginhelper.TAB ]; var SPECIAL_CHARS = [ _pluginhelper.EQ, _pluginhelper.SPACE, _pluginhelper.TAB ]; var END_POS_OFFSET = 2; // length + start position offset var isWhiteSpace = function(char) { return WHITESPACES.indexOf(char) >= 0; }; var isEscapeChar = function(char) { return char === _pluginhelper.BACKSLASH; }; var isSpecialChar = function(char) { return SPECIAL_CHARS.indexOf(char) >= 0; }; var isNewLine = function(char) { return char === _pluginhelper.N; }; var unq = function(val) { return (0, _utils.unquote)((0, _utils.trimChar)(val, _pluginhelper.QUOTEMARK)); }; function createLexer(buffer, options) { if (options === void 0) options = {}; var row = 0; var prevCol = 0; var col = 0; var tokenIndex = -1; var stateMode = STATE_WORD; var tagMode = TAG_STATE_NAME; var contextFreeTag = ""; var tokens = new Array(Math.floor(buffer.length)); var openTag = options.openTag || _pluginhelper.OPEN_BRAKET; var closeTag = options.closeTag || _pluginhelper.CLOSE_BRAKET; var escapeTags = !!options.enableEscapeTags; var contextFreeTags = (options.contextFreeTags || []).filter(Boolean).map(function(tag) { return tag.toLowerCase(); }); var caseFreeTags = options.caseFreeTags || false; var nestedMap = new Map(); var onToken = options.onToken || function() {}; var RESERVED_CHARS = [ closeTag, openTag, _pluginhelper.QUOTEMARK, _pluginhelper.BACKSLASH, _pluginhelper.SPACE, _pluginhelper.TAB, _pluginhelper.EQ, _pluginhelper.N, EM ]; var NOT_CHAR_TOKENS = [ openTag, _pluginhelper.SPACE, _pluginhelper.TAB, _pluginhelper.N ]; var isCharReserved = function(char) { return RESERVED_CHARS.indexOf(char) >= 0; }; var isCharToken = function(char) { return NOT_CHAR_TOKENS.indexOf(char) === -1; }; var isEscapableChar = function(char) { return char === openTag || char === closeTag || char === _pluginhelper.BACKSLASH; }; var onSkip = function() { col++; }; var setupContextFreeTag = function(name, isClosingTag) { if (contextFreeTag !== "" && isClosingTag) { contextFreeTag = ""; } var tagName = name.toLowerCase(); if (contextFreeTag === "" && isTokenNested(name) && contextFreeTags.includes(tagName)) { contextFreeTag = tagName; } }; var toEndTag = function(tagName) { return "" + openTag + _pluginhelper.SLASH + tagName + closeTag; }; var chars = (0, _utils.createCharGrabber)(buffer, { onSkip: onSkip }); /** * Emits newly created token to subscriber * * @param {number} type - 1 - word, 2 - tag, 3 - attr-name, 4 - attr-value, 5 - space, 6 - new-line * @param {string} value - token value * @param {number} startPos - start position * @param {number} endPos - end position */ function emitToken(type, value, startPos, endPos) { var token = createTokenOfType(type, value, row, prevCol, startPos, endPos); onToken(token); prevCol = col; tokenIndex += 1; tokens[tokenIndex] = token; } function nextTagState(tagChars, isSingleValueTag, masterStartPos) { if (tagMode === TAG_STATE_ATTR) { var validAttrName = function(char) { return !(char === _pluginhelper.EQ || isWhiteSpace(char)); }; var name = tagChars.grabWhile(validAttrName); var isEnd = tagChars.isLast(); var isValue = tagChars.getCurr() !== _pluginhelper.EQ; tagChars.skip(); if (isEnd || isValue) { emitToken(_Token.TYPE_ATTR_VALUE, unq(name)); } else { emitToken(_Token.TYPE_ATTR_NAME, name); } if (isEnd) { return TAG_STATE_NAME; } if (isValue) { return TAG_STATE_ATTR; } return TAG_STATE_VALUE; } if (tagMode === TAG_STATE_VALUE) { var stateSpecial = false; var validAttrValue = function(char) { // const isEQ = char === EQ; var isQM = char === _pluginhelper.QUOTEMARK; var prevChar = tagChars.getPrev(); var nextChar = tagChars.getNext(); var isPrevSLASH = prevChar === _pluginhelper.BACKSLASH; var isNextEQ = nextChar === _pluginhelper.EQ; var isWS = isWhiteSpace(char); // const isPrevWS = isWhiteSpace(prevChar); var isNextWS = !!nextChar && isWhiteSpace(nextChar); if (stateSpecial && isSpecialChar(char)) { return true; } if (isQM && !isPrevSLASH) { stateSpecial = !stateSpecial; if (!stateSpecial && !(isNextEQ || isNextWS)) { return false; } } if (!isSingleValueTag) { return !isWS; // return (isEQ || isWS) === false; } return true; }; var name1 = tagChars.grabWhile(validAttrValue); tagChars.skip(); emitToken(_Token.TYPE_ATTR_VALUE, unq(name1)); if (tagChars.getPrev() === _pluginhelper.QUOTEMARK) { prevCol++; } if (tagChars.isLast()) { return TAG_STATE_NAME; } return TAG_STATE_ATTR; } var start = masterStartPos + tagChars.getPos() - 1; var validName = function(char) { return !(char === _pluginhelper.EQ || isWhiteSpace(char) || tagChars.isLast()); }; var name2 = tagChars.grabWhile(validName); emitToken(_Token.TYPE_TAG, name2, start, masterStartPos + tagChars.getLength() + 1); setupContextFreeTag(name2); tagChars.skip(); prevCol++; // in cases when we have [url=someval]GET[/url] and we don't need to parse all if (isSingleValueTag) { return TAG_STATE_VALUE; } var hasEQ = tagChars.includes(_pluginhelper.EQ); return hasEQ ? TAG_STATE_ATTR : TAG_STATE_VALUE; } function stateTag() { var currChar = chars.getCurr(); var nextChar = chars.getNext(); var isNextCharReserved = Boolean(nextChar && isCharReserved(nextChar)); chars.skip(); // skip openTag // detect case where we have '[My word [tag][/tag]' or we have '[My last line word' var substr = chars.substrUntilChar(closeTag); var hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0; var isLastChar = chars.isLast(); var hasSpace = substr.indexOf(_pluginhelper.SPACE) >= 0; var isSpaceRestricted = hasSpace && options.whitespaceInTags === false; if (isNextCharReserved || hasInvalidChars || isLastChar || isSpaceRestricted) { emitToken(_Token.TYPE_WORD, currChar); return STATE_WORD; } // [myTag ] var isNoAttrsInTag = substr.indexOf(_pluginhelper.EQ) === -1; // [/myTag] var isClosingTag = substr[0] === _pluginhelper.SLASH; // [url] or [/url] if (isNoAttrsInTag || isClosingTag) { var startPos = chars.getPos() - 1; var name = chars.grabWhile(function(char) { return char !== closeTag; }); var endPos = startPos + name.length + END_POS_OFFSET; chars.skip(); // skip closeTag emitToken(_Token.TYPE_TAG, name, startPos, endPos); setupContextFreeTag(name, isClosingTag); return STATE_WORD; } return STATE_TAG_ATTRS; } function stateAttrs() { var startPos = chars.getPos(); var silent = true; var tagStr = chars.grabWhile(function(char) { return char !== closeTag; }, silent); var tagGrabber = (0, _utils.createCharGrabber)(tagStr, { onSkip: onSkip }); var eqParts = tagStr.split(_pluginhelper.EQ); var tagName = eqParts[0]; var isEndTag = tagName[0] === _pluginhelper.SLASH; var isSingleAttrTag = tagName.indexOf(_pluginhelper.SPACE) === -1; var isSingleValueTag = !isEndTag && isSingleAttrTag; tagMode = TAG_STATE_NAME; while(tagGrabber.hasNext()){ tagMode = nextTagState(tagGrabber, isSingleValueTag, startPos); } chars.skip(); // skip closeTag return STATE_WORD; } function stateWord() { if (isNewLine(chars.getCurr())) { emitToken(_Token.TYPE_NEW_LINE, chars.getCurr()); chars.skip(); col = 0; prevCol = 0; row++; return STATE_WORD; } if (isWhiteSpace(chars.getCurr())) { var word = chars.grabWhile(isWhiteSpace); emitToken(_Token.TYPE_SPACE, word); return STATE_WORD; } if (chars.getCurr() === openTag) { if (contextFreeTag) { var fullTagName = toEndTag(contextFreeTag); var foundTag = chars.grabN(fullTagName.length); var isContextFreeEnded = foundTag.toLowerCase() === fullTagName.toLowerCase(); if (isContextFreeEnded) { return STATE_TAG; } } else if (chars.includes(closeTag)) { return STATE_TAG; } emitToken(_Token.TYPE_WORD, chars.getCurr()); chars.skip(); prevCol++; return STATE_WORD; } if (escapeTags) { if (isEscapeChar(chars.getCurr())) { var currChar = chars.getCurr(); var nextChar = chars.getNext(); chars.skip(); // skip the \ without emitting anything if (nextChar && isEscapableChar(nextChar)) { chars.skip(); // skip past the [, ] or \ as well emitToken(_Token.TYPE_WORD, nextChar); return STATE_WORD; } emitToken(_Token.TYPE_WORD, currChar); return STATE_WORD; } var isChar = function(char) { return isCharToken(char) && !isEscapeChar(char); }; var word1 = chars.grabWhile(isChar); emitToken(_Token.TYPE_WORD, word1); return STATE_WORD; } var word2 = chars.grabWhile(isCharToken); emitToken(_Token.TYPE_WORD, word2); return STATE_WORD; } function tokenize() { stateMode = STATE_WORD; while(chars.hasNext()){ switch(stateMode){ case STATE_TAG: stateMode = stateTag(); break; case STATE_TAG_ATTRS: stateMode = stateAttrs(); break; case STATE_WORD: default: stateMode = stateWord(); break; } } tokens.length = tokenIndex + 1; return tokens; } function isTokenNested(tokenValue) { var value = toEndTag(tokenValue); if (nestedMap.has(value)) { return !!nestedMap.get(value); } else { var buf = caseFreeTags ? buffer.toLowerCase() : buffer; var val = caseFreeTags ? value.toLowerCase() : value; var status = buf.indexOf(val) > -1; nestedMap.set(value, status); return status; } } return { tokenize: tokenize, isTokenNested: isTokenNested }; }