UNPKG

@bbob/parser

Version:

Just parses BBcode to AST array. Part of @bbob bbcode parser

243 lines (201 loc) 7 kB
/* eslint-disable no-plusplus,no-param-reassign */ import { OPEN_BRAKET, CLOSE_BRAKET, QUOTEMARK, BACKSLASH, SLASH, SPACE, TAB, EQ, N, } from '@bbob/plugin-helper/lib/char'; import { Token, TYPE_ATTR_NAME, TYPE_ATTR_VALUE, TYPE_NEW_LINE, TYPE_SPACE, TYPE_TAG, TYPE_WORD, } from './Token'; import { createCharGrabber, trimChar, unquote } from './utils'; // for cases <!-- --> const EM = '!'; /** * Creates a Token entity class * @param {String} type * @param {String} value * @param {Number} r line number * @param {Number} cl char number in line */ const createToken = (type, value, r = 0, cl = 0) => new Token(type, value, r, cl); /** * @typedef {Object} Lexer * @property {Function} tokenize * @property {Function} isTokenNested */ /** * @param {String} buffer * @param {Object} options * @param {Function} options.onToken * @param {String} options.openTag * @param {String} options.closeTag * @param {Boolean} options.enableEscapeTags * @return {Lexer} */ function createLexer(buffer, options = {}) { let row = 0; let col = 0; let tokenIndex = -1; const tokens = new Array(Math.floor(buffer.length)); const openTag = options.openTag || OPEN_BRAKET; const closeTag = options.closeTag || CLOSE_BRAKET; const escapeTags = options.enableEscapeTags; const RESERVED_CHARS = [closeTag, openTag, QUOTEMARK, BACKSLASH, SPACE, TAB, EQ, N, EM]; const NOT_CHAR_TOKENS = [ // ...(options.enableEscapeTags ? [BACKSLASH] : []), openTag, SPACE, TAB, N, ]; const WHITESPACES = [SPACE, TAB]; const SPECIAL_CHARS = [EQ, SPACE, TAB]; const isCharReserved = (char) => (RESERVED_CHARS.indexOf(char) >= 0); const isWhiteSpace = (char) => (WHITESPACES.indexOf(char) >= 0); const isCharToken = (char) => (NOT_CHAR_TOKENS.indexOf(char) === -1); const isSpecialChar = (char) => (SPECIAL_CHARS.indexOf(char) >= 0); const isEscapableChar = (char) => (char === openTag || char === closeTag || char === BACKSLASH); const isEscapeChar = (char) => char === BACKSLASH; /** * Emits newly created token to subscriber * @param token */ const emitToken = (token) => { if (options.onToken) { options.onToken(token); } tokenIndex += 1; tokens[tokenIndex] = token; }; /** * Parses params inside [myTag---params goes here---]content[/myTag] * @param str * @returns {{tag: *, attrs: Array}} */ const parseAttrs = (str) => { let tagName = null; let skipSpecialChars = false; const attrTokens = []; const attrCharGrabber = createCharGrabber(str); const validAttr = (char) => { const isEQ = char === EQ; const isWS = isWhiteSpace(char); const prevChar = attrCharGrabber.getPrev(); const nextChar = attrCharGrabber.getNext(); const isPrevSLASH = prevChar === BACKSLASH; const isTagNameEmpty = tagName === null; if (isTagNameEmpty) { return (isEQ || isWS || attrCharGrabber.isLast()) === false; } if (skipSpecialChars && isSpecialChar(char)) { return true; } if (char === QUOTEMARK && !isPrevSLASH) { skipSpecialChars = !skipSpecialChars; if (!skipSpecialChars && !(nextChar === EQ || isWhiteSpace(nextChar))) { return false; } } return (isEQ || isWS) === false; }; const nextAttr = () => { const attrStr = attrCharGrabber.grabWhile(validAttr); const currChar = attrCharGrabber.getCurr(); // first string before space is a tag name [tagName params...] if (tagName === null) { tagName = attrStr; } else if (isWhiteSpace(currChar) || currChar === QUOTEMARK || !attrCharGrabber.hasNext()) { const escaped = unquote(trimChar(attrStr, QUOTEMARK)); attrTokens.push(createToken(TYPE_ATTR_VALUE, escaped, row, col)); } else { attrTokens.push(createToken(TYPE_ATTR_NAME, attrStr, row, col)); } attrCharGrabber.skip(); }; while (attrCharGrabber.hasNext()) { nextAttr(); } return { tag: tagName, attrs: attrTokens }; }; const bufferGrabber = createCharGrabber(buffer, { onSkip: () => { col++; }, }); const next = () => { const currChar = bufferGrabber.getCurr(); const nextChar = bufferGrabber.getNext(); if (currChar === N) { bufferGrabber.skip(); col = 0; row++; emitToken(createToken(TYPE_NEW_LINE, currChar, row, col)); } else if (isWhiteSpace(currChar)) { const str = bufferGrabber.grabWhile(isWhiteSpace); emitToken(createToken(TYPE_SPACE, str, row, col)); } else if (escapeTags && isEscapeChar(currChar) && isEscapableChar(nextChar)) { bufferGrabber.skip(); // skip the \ without emitting anything bufferGrabber.skip(); // skip past the [, ] or \ as well emitToken(createToken(TYPE_WORD, nextChar, row, col)); } else if (currChar === openTag) { bufferGrabber.skip(); // skip openTag // detect case where we have '[My word [tag][/tag]' or we have '[My last line word' const substr = bufferGrabber.substrUntilChar(closeTag); const hasInvalidChars = substr.length === 0 || substr.indexOf(openTag) >= 0; if (isCharReserved(nextChar) || hasInvalidChars || bufferGrabber.isLast()) { emitToken(createToken(TYPE_WORD, currChar, row, col)); } else { const str = bufferGrabber.grabWhile((val) => val !== closeTag); bufferGrabber.skip(); // skip closeTag // [myTag ] const isNoAttrsInTag = str.indexOf(EQ) === -1; // [/myTag] const isClosingTag = str[0] === SLASH; if (isNoAttrsInTag || isClosingTag) { emitToken(createToken(TYPE_TAG, str, row, col)); } else { const parsed = parseAttrs(str); emitToken(createToken(TYPE_TAG, parsed.tag, row, col)); parsed.attrs.map(emitToken); } } } else if (currChar === closeTag) { bufferGrabber.skip(); // skip closeTag emitToken(createToken(TYPE_WORD, currChar, row, col)); } else if (isCharToken(currChar)) { if (escapeTags && isEscapeChar(currChar) && !isEscapableChar(nextChar)) { bufferGrabber.skip(); emitToken(createToken(TYPE_WORD, currChar, row, col)); } else { const str = bufferGrabber.grabWhile((char) => { if (escapeTags) { return isCharToken(char) && !isEscapeChar(char); } return isCharToken(char); }); emitToken(createToken(TYPE_WORD, str, row, col)); } } }; const tokenize = () => { while (bufferGrabber.hasNext()) { next(); } tokens.length = tokenIndex + 1; return tokens; }; const isTokenNested = (token) => { const value = openTag + SLASH + token.getValue(); // potential bottleneck return buffer.indexOf(value) > -1; }; return { tokenize, isTokenNested, }; } export const createTokenOfType = createToken; export { createLexer };